aboutsummaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorThomas Schwinge <thomas@codesourcery.com>2015-01-15 21:11:12 +0100
committerThomas Schwinge <tschwinge@gcc.gnu.org>2015-01-15 21:11:12 +0100
commit41dbbb3789850dfea98dd8984f69806284f87b6e (patch)
tree97a0bb274cc7583206397ba37ab5c0bbe01cb04d /libgomp
parent96a87981994da859c17259d8c4dccb6602476b0e (diff)
downloadgcc-41dbbb3789850dfea98dd8984f69806284f87b6e.zip
gcc-41dbbb3789850dfea98dd8984f69806284f87b6e.tar.gz
gcc-41dbbb3789850dfea98dd8984f69806284f87b6e.tar.bz2
Merge current set of OpenACC changes from gomp-4_0-branch.
contrib/ * gcc_update (files_and_dependencies): Update rules for new libgomp/plugin/Makefrag.am and libgomp/plugin/configfrag.ac files. gcc/ * builtin-types.def (BT_FN_VOID_INT_INT_VAR) (BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR_INT_INT_VAR) (BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR): New function types. * builtins.c: Include "gomp-constants.h". (expand_builtin_acc_on_device): New function. (expand_builtin, is_inexpensive_builtin): Handle BUILT_IN_ACC_ON_DEVICE. * builtins.def (DEF_GOACC_BUILTIN, DEF_GOACC_BUILTIN_COMPILER): New macros. * cgraph.c (cgraph_node::create): Consider flag_openacc next to flag_openmp. * config.gcc <nvptx-*> (tm_file): Add nvptx/offload.h. <*-intelmic-* | *-intelmicemul-*> (tm_file): Add i386/intelmic-offload.h. * gcc.c (LINK_COMMAND_SPEC, GOMP_SELF_SPECS): For -fopenacc, link to libgomp and its dependencies. * config/arc/arc.h (LINK_COMMAND_SPEC): Likewise. * config/darwin.h (LINK_COMMAND_SPEC_A): Likewise. * config/i386/mingw32.h (GOMP_SELF_SPECS): Likewise. * config/ia64/hpux.h (LIB_SPEC): Likewise. * config/pa/pa-hpux11.h (LIB_SPEC): Likewise. * config/pa/pa64-hpux.h (LIB_SPEC): Likewise. * doc/generic.texi: Update for OpenACC changes. * doc/gimple.texi: Likewise. * doc/invoke.texi: Likewise. * doc/sourcebuild.texi: Likewise. * gimple-pretty-print.c (dump_gimple_omp_for): Handle GF_OMP_FOR_KIND_OACC_LOOP. (dump_gimple_omp_target): Handle GF_OMP_TARGET_KIND_OACC_KERNELS, GF_OMP_TARGET_KIND_OACC_PARALLEL, GF_OMP_TARGET_KIND_OACC_DATA, GF_OMP_TARGET_KIND_OACC_UPDATE, GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA. Dump more data. * gimple.c: Update comments for OpenACC changes. * gimple.def: Likewise. * gimple.h: Likewise. (enum gf_mask): Add GF_OMP_FOR_KIND_OACC_LOOP, GF_OMP_TARGET_KIND_OACC_PARALLEL, GF_OMP_TARGET_KIND_OACC_KERNELS, GF_OMP_TARGET_KIND_OACC_DATA, GF_OMP_TARGET_KIND_OACC_UPDATE, GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA. (gimple_omp_for_cond, gimple_omp_for_set_cond): Sort in the appropriate place. (is_gimple_omp_oacc, is_gimple_omp_offloaded): New functions. * gimplify.c: Include "gomp-constants.h". Update comments for OpenACC changes. (is_gimple_stmt): Handle OACC_PARALLEL, OACC_KERNELS, OACC_DATA, OACC_HOST_DATA, OACC_DECLARE, OACC_UPDATE, OACC_ENTER_DATA, OACC_EXIT_DATA, OACC_CACHE, OACC_LOOP. (gimplify_scan_omp_clauses, gimplify_adjust_omp_clauses): Handle OMP_CLAUSE__CACHE_, OMP_CLAUSE_ASYNC, OMP_CLAUSE_WAIT, OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS, OMP_CLAUSE_VECTOR_LENGTH, OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_DEVICE_RESIDENT, OMP_CLAUSE_USE_DEVICE, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO, OMP_CLAUSE_SEQ. (gimplify_adjust_omp_clauses_1, gimplify_adjust_omp_clauses): Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. Use OMP_CLAUSE_SET_MAP_KIND. (gimplify_oacc_cache): New function. (gimplify_omp_for): Handle OACC_LOOP. (gimplify_omp_workshare): Handle OACC_KERNELS, OACC_PARALLEL, OACC_DATA. (gimplify_omp_target_update): Handle OACC_ENTER_DATA, OACC_EXIT_DATA, OACC_UPDATE. (gimplify_expr): Handle OACC_LOOP, OACC_CACHE, OACC_HOST_DATA, OACC_DECLARE, OACC_KERNELS, OACC_PARALLEL, OACC_DATA, OACC_ENTER_DATA, OACC_EXIT_DATA, OACC_UPDATE. (gimplify_body): Consider flag_openacc next to flag_openmp. * lto-streamer-out.c: Include "gomp-constants.h". * omp-builtins.def (BUILT_IN_ACC_GET_DEVICE_TYPE) (BUILT_IN_GOACC_DATA_START, BUILT_IN_GOACC_DATA_END) (BUILT_IN_GOACC_ENTER_EXIT_DATA, BUILT_IN_GOACC_PARALLEL) (BUILT_IN_GOACC_UPDATE, BUILT_IN_GOACC_WAIT) (BUILT_IN_GOACC_GET_THREAD_NUM, BUILT_IN_GOACC_GET_NUM_THREADS) (BUILT_IN_ACC_ON_DEVICE): New builtins. * omp-low.c: Include "gomp-constants.h". Update comments for OpenACC changes. (struct omp_context): Add reduction_map, gwv_below, gwv_this members. (extract_omp_for_data, use_pointer_for_field, install_var_field) (new_omp_context, delete_omp_context, scan_sharing_clauses) (create_omp_child_function, scan_omp_for, scan_omp_target) (check_omp_nesting_restrictions, lower_reduction_clauses) (build_omp_regions_1, diagnose_sb_0, make_gimple_omp_edges): Update for OpenACC changes. (scan_sharing_clauses): Handle OMP_CLAUSE_NUM_GANGS: OMP_CLAUSE_NUM_WORKERS: OMP_CLAUSE_VECTOR_LENGTH, OMP_CLAUSE_ASYNC, OMP_CLAUSE_WAIT, OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_DEVICE_RESIDENT, OMP_CLAUSE_USE_DEVICE, OMP_CLAUSE__CACHE_, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO, OMP_CLAUSE_SEQ. Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. (expand_omp_for_static_nochunk, expand_omp_for_static_chunk): Handle GF_OMP_FOR_KIND_OACC_LOOP. (expand_omp_target, lower_omp_target): Handle GF_OMP_TARGET_KIND_OACC_PARALLEL, GF_OMP_TARGET_KIND_OACC_KERNELS, GF_OMP_TARGET_KIND_OACC_UPDATE, GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA, GF_OMP_TARGET_KIND_OACC_DATA. (pass_expand_omp::execute, execute_lower_omp) (pass_diagnose_omp_blocks::gate): Consider flag_openacc next to flag_openmp. (offload_symbol_decl): New variable. (oacc_get_reduction_array_id, oacc_max_threads) (get_offload_symbol_decl, get_base_type, lookup_oacc_reduction) (maybe_lookup_oacc_reduction, enclosing_target_ctx) (oacc_loop_or_target_p, oacc_lower_reduction_var_helper) (oacc_gimple_assign, oacc_initialize_reduction_data) (oacc_finalize_reduction_data, oacc_process_reduction_data): New functions. (is_targetreg_ctx): Remove function. * tree-core.h (enum omp_clause_code): Add OMP_CLAUSE__CACHE_, OMP_CLAUSE_DEVICE_RESIDENT, OMP_CLAUSE_USE_DEVICE, OMP_CLAUSE_GANG, OMP_CLAUSE_ASYNC, OMP_CLAUSE_WAIT, OMP_CLAUSE_AUTO, OMP_CLAUSE_SEQ, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS, OMP_CLAUSE_VECTOR_LENGTH. * tree.c (omp_clause_code_name, walk_tree_1): Update accordingly. * tree.h (OMP_CLAUSE_GANG_EXPR, OMP_CLAUSE_GANG_STATIC_EXPR) (OMP_CLAUSE_ASYNC_EXPR, OMP_CLAUSE_WAIT_EXPR) (OMP_CLAUSE_VECTOR_EXPR, OMP_CLAUSE_WORKER_EXPR) (OMP_CLAUSE_NUM_GANGS_EXPR, OMP_CLAUSE_NUM_WORKERS_EXPR) (OMP_CLAUSE_VECTOR_LENGTH_EXPR): New macros. * tree-core.h: Update comments for OpenACC changes. (enum omp_clause_map_kind): Remove. (struct tree_omp_clause): Change type of map_kind member from enum omp_clause_map_kind to unsigned char. * tree-inline.c: Update comments for OpenACC changes. * tree-nested.c: Likewise. Include "gomp-constants.h". (convert_nonlocal_reference_stmt, convert_local_reference_stmt) (convert_tramp_reference_stmt, convert_gimple_call): Update for OpenACC changes. Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. Use OMP_CLAUSE_SET_MAP_KIND. * tree-pretty-print.c: Include "gomp-constants.h". (dump_omp_clause): Handle OMP_CLAUSE_DEVICE_RESIDENT, OMP_CLAUSE_USE_DEVICE, OMP_CLAUSE__CACHE_, OMP_CLAUSE_GANG, OMP_CLAUSE_ASYNC, OMP_CLAUSE_AUTO, OMP_CLAUSE_SEQ, OMP_CLAUSE_WAIT, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS, OMP_CLAUSE_VECTOR_LENGTH, OMP_CLAUSE_INDEPENDENT. Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. (dump_generic_node): Handle OACC_PARALLEL, OACC_KERNELS, OACC_DATA, OACC_HOST_DATA, OACC_DECLARE, OACC_UPDATE, OACC_ENTER_DATA, OACC_EXIT_DATA, OACC_CACHE, OACC_LOOP. * tree-streamer-in.c: Include "gomp-constants.h". (unpack_ts_omp_clause_value_fields) Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. Use OMP_CLAUSE_SET_MAP_KIND. * tree-streamer-out.c: Include "gomp-constants.h". (pack_ts_omp_clause_value_fields): Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. * tree.def (OACC_PARALLEL, OACC_KERNELS, OACC_DATA) (OACC_HOST_DATA, OACC_LOOP, OACC_CACHE, OACC_DECLARE) (OACC_ENTER_DATA, OACC_EXIT_DATA, OACC_UPDATE): New tree codes. * tree.c (omp_clause_num_ops): Update accordingly. * tree.h (OMP_BODY, OMP_CLAUSES, OMP_LOOP_CHECK, OMP_CLAUSE_SIZE): Likewise. (OACC_PARALLEL_BODY, OACC_PARALLEL_CLAUSES, OACC_KERNELS_BODY) (OACC_KERNELS_CLAUSES, OACC_DATA_BODY, OACC_DATA_CLAUSES) (OACC_HOST_DATA_BODY, OACC_HOST_DATA_CLAUSES, OACC_CACHE_CLAUSES) (OACC_DECLARE_CLAUSES, OACC_ENTER_DATA_CLAUSES) (OACC_EXIT_DATA_CLAUSES, OACC_UPDATE_CLAUSES) (OACC_KERNELS_COMBINED, OACC_PARALLEL_COMBINED): New macros. * tree.h (OMP_CLAUSE_MAP_KIND): Cast it to enum gomp_map_kind. (OMP_CLAUSE_SET_MAP_KIND): New macro. * varpool.c (varpool_node::get_create): Consider flag_openacc next to flag_openmp. * config/i386/intelmic-offload.h: New file. * config/nvptx/offload.h: Likewise. gcc/ada/ * gcc-interface/utils.c (DEF_FUNCTION_TYPE_VAR_8) (DEF_FUNCTION_TYPE_VAR_12): New macros. gcc/c-family/ * c.opt (fopenacc): New option. * c-cppbuiltin.c (c_cpp_builtins): Conditionally define _OPENACC. * c-common.c (DEF_FUNCTION_TYPE_VAR_8, DEF_FUNCTION_TYPE_VAR_12): New macros. * c-common.h (c_finish_oacc_wait): New prototype. * c-omp.c: Include "omp-low.h" and "gomp-constants.h". (c_finish_oacc_wait): New function. * c-pragma.c (oacc_pragmas): New variable. (c_pp_lookup_pragma, init_pragma): Handle it. * c-pragma.h (enum pragma_kind): Add PRAGMA_OACC_CACHE, PRAGMA_OACC_DATA, PRAGMA_OACC_ENTER_DATA, PRAGMA_OACC_EXIT_DATA, PRAGMA_OACC_KERNELS, PRAGMA_OACC_LOOP, PRAGMA_OACC_PARALLEL, PRAGMA_OACC_UPDATE, PRAGMA_OACC_WAIT. (enum pragma_omp_clause): Add PRAGMA_OACC_CLAUSE_ASYNC, PRAGMA_OACC_CLAUSE_AUTO, PRAGMA_OACC_CLAUSE_COLLAPSE, PRAGMA_OACC_CLAUSE_COPY, PRAGMA_OACC_CLAUSE_COPYIN, PRAGMA_OACC_CLAUSE_COPYOUT, PRAGMA_OACC_CLAUSE_CREATE, PRAGMA_OACC_CLAUSE_DELETE, PRAGMA_OACC_CLAUSE_DEVICE, PRAGMA_OACC_CLAUSE_DEVICEPTR, PRAGMA_OACC_CLAUSE_FIRSTPRIVATE, PRAGMA_OACC_CLAUSE_GANG, PRAGMA_OACC_CLAUSE_HOST, PRAGMA_OACC_CLAUSE_IF, PRAGMA_OACC_CLAUSE_NUM_GANGS, PRAGMA_OACC_CLAUSE_NUM_WORKERS, PRAGMA_OACC_CLAUSE_PRESENT, PRAGMA_OACC_CLAUSE_PRESENT_OR_COPY, PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYIN, PRAGMA_OACC_CLAUSE_PRESENT_OR_COPYOUT, PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE, PRAGMA_OACC_CLAUSE_PRIVATE, PRAGMA_OACC_CLAUSE_REDUCTION, PRAGMA_OACC_CLAUSE_SELF, PRAGMA_OACC_CLAUSE_SEQ, PRAGMA_OACC_CLAUSE_VECTOR, PRAGMA_OACC_CLAUSE_VECTOR_LENGTH, PRAGMA_OACC_CLAUSE_WAIT, PRAGMA_OACC_CLAUSE_WORKER. gcc/c/ * c-parser.c: Include "gomp-constants.h". (c_parser_omp_clause_map): Use enum gomp_map_kind instead of enum omp_clause_map_kind. Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. Use OMP_CLAUSE_SET_MAP_KIND. (c_parser_pragma): Handle PRAGMA_OACC_ENTER_DATA, PRAGMA_OACC_EXIT_DATA, PRAGMA_OACC_UPDATE. (c_parser_omp_construct): Handle PRAGMA_OACC_CACHE, PRAGMA_OACC_DATA, PRAGMA_OACC_KERNELS, PRAGMA_OACC_LOOP, PRAGMA_OACC_PARALLEL, PRAGMA_OACC_WAIT. (c_parser_omp_clause_name): Handle "auto", "async", "copy", "copyout", "create", "delete", "deviceptr", "gang", "host", "num_gangs", "num_workers", "present", "present_or_copy", "pcopy", "present_or_copyin", "pcopyin", "present_or_copyout", "pcopyout", "present_or_create", "pcreate", "seq", "self", "vector", "vector_length", "wait", "worker". (OACC_DATA_CLAUSE_MASK, OACC_KERNELS_CLAUSE_MASK) (OACC_ENTER_DATA_CLAUSE_MASK, OACC_EXIT_DATA_CLAUSE_MASK) (OACC_LOOP_CLAUSE_MASK, OACC_PARALLEL_CLAUSE_MASK) (OACC_UPDATE_CLAUSE_MASK, OACC_WAIT_CLAUSE_MASK): New macros. (c_parser_omp_variable_list): Handle OMP_CLAUSE__CACHE_. (c_parser_oacc_wait_list, c_parser_oacc_data_clause) (c_parser_oacc_data_clause_deviceptr) (c_parser_omp_clause_num_gangs, c_parser_omp_clause_num_workers) (c_parser_oacc_clause_async, c_parser_oacc_clause_wait) (c_parser_omp_clause_vector_length, c_parser_oacc_all_clauses) (c_parser_oacc_cache, c_parser_oacc_data, c_parser_oacc_kernels) (c_parser_oacc_enter_exit_data, c_parser_oacc_loop) (c_parser_oacc_parallel, c_parser_oacc_update) (c_parser_oacc_wait): New functions. * c-tree.h (c_finish_oacc_parallel, c_finish_oacc_kernels) (c_finish_oacc_data): New prototypes. * c-typeck.c: Include "gomp-constants.h". (handle_omp_array_sections): Handle GOMP_MAP_FORCE_DEVICEPTR. Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. Use OMP_CLAUSE_SET_MAP_KIND. (c_finish_oacc_parallel, c_finish_oacc_kernels) (c_finish_oacc_data): New functions. (c_finish_omp_clauses): Handle OMP_CLAUSE__CACHE_, OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS, OMP_CLAUSE_VECTOR_LENGTH, OMP_CLAUSE_ASYNC, OMP_CLAUSE_WAIT, OMP_CLAUSE_AUTO, OMP_CLAUSE_SEQ, OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, and OMP_CLAUSE_MAP's GOMP_MAP_FORCE_DEVICEPTR. gcc/cp/ * parser.c: Include "gomp-constants.h". (cp_parser_omp_clause_map): Use enum gomp_map_kind instead of enum omp_clause_map_kind. Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. Use OMP_CLAUSE_SET_MAP_KIND. (cp_parser_omp_construct, cp_parser_pragma): Handle PRAGMA_OACC_CACHE, PRAGMA_OACC_DATA, PRAGMA_OACC_ENTER_DATA, PRAGMA_OACC_EXIT_DATA, PRAGMA_OACC_KERNELS, PRAGMA_OACC_PARALLEL, PRAGMA_OACC_LOOP, PRAGMA_OACC_UPDATE, PRAGMA_OACC_WAIT. (cp_parser_omp_clause_name): Handle "async", "copy", "copyout", "create", "delete", "deviceptr", "host", "num_gangs", "num_workers", "present", "present_or_copy", "pcopy", "present_or_copyin", "pcopyin", "present_or_copyout", "pcopyout", "present_or_create", "pcreate", "vector_length", "wait". (OACC_DATA_CLAUSE_MASK, OACC_ENTER_DATA_CLAUSE_MASK) (OACC_EXIT_DATA_CLAUSE_MASK, OACC_KERNELS_CLAUSE_MASK) (OACC_LOOP_CLAUSE_MASK, OACC_PARALLEL_CLAUSE_MASK) (OACC_UPDATE_CLAUSE_MASK, OACC_WAIT_CLAUSE_MASK): New macros. (cp_parser_omp_var_list_no_open): Handle OMP_CLAUSE__CACHE_. (cp_parser_oacc_data_clause, cp_parser_oacc_data_clause_deviceptr) (cp_parser_oacc_clause_vector_length, cp_parser_oacc_wait_list) (cp_parser_oacc_clause_wait, cp_parser_omp_clause_num_gangs) (cp_parser_omp_clause_num_workers, cp_parser_oacc_clause_async) (cp_parser_oacc_all_clauses, cp_parser_oacc_cache) (cp_parser_oacc_data, cp_parser_oacc_enter_exit_data) (cp_parser_oacc_kernels, cp_parser_oacc_loop) (cp_parser_oacc_parallel, cp_parser_oacc_update) (cp_parser_oacc_wait): New functions. * cp-tree.h (finish_oacc_data, finish_oacc_kernels) (finish_oacc_parallel): New prototypes. * semantics.c: Include "gomp-constants.h". (handle_omp_array_sections): Handle GOMP_MAP_FORCE_DEVICEPTR. Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. Use OMP_CLAUSE_SET_MAP_KIND. (finish_omp_clauses): Handle OMP_CLAUSE_ASYNC, OMP_CLAUSE_VECTOR_LENGTH, OMP_CLAUSE_WAIT, OMP_CLAUSE__CACHE_. Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. (finish_oacc_data, finish_oacc_kernels, finish_oacc_parallel): New functions. gcc/fortran/ * lang.opt (fopenacc): New option. * cpp.c (cpp_define_builtins): Conditionally define _OPENACC. * dump-parse-tree.c (show_omp_node): Split part of it into... (show_omp_clauses): ... this new function. (show_omp_node, show_code_node): Handle EXEC_OACC_PARALLEL_LOOP, EXEC_OACC_PARALLEL, EXEC_OACC_KERNELS_LOOP, EXEC_OACC_KERNELS, EXEC_OACC_DATA, EXEC_OACC_HOST_DATA, EXEC_OACC_LOOP, EXEC_OACC_UPDATE, EXEC_OACC_WAIT, EXEC_OACC_CACHE, EXEC_OACC_ENTER_DATA, EXEC_OACC_EXIT_DATA. (show_namespace): Update for OpenACC. * f95-lang.c (DEF_FUNCTION_TYPE_VAR_2, DEF_FUNCTION_TYPE_VAR_8) (DEF_FUNCTION_TYPE_VAR_12, DEF_GOACC_BUILTIN) (DEF_GOACC_BUILTIN_COMPILER): New macros. * types.def (BT_FN_VOID_INT_INT_VAR) (BT_FN_VOID_INT_PTR_SIZE_PTR_PTR_PTR_INT_INT_VAR) (BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR): New function types. * gfortran.h (gfc_statement): Add ST_OACC_PARALLEL_LOOP, ST_OACC_END_PARALLEL_LOOP, ST_OACC_PARALLEL, ST_OACC_END_PARALLEL, ST_OACC_KERNELS, ST_OACC_END_KERNELS, ST_OACC_DATA, ST_OACC_END_DATA, ST_OACC_HOST_DATA, ST_OACC_END_HOST_DATA, ST_OACC_LOOP, ST_OACC_END_LOOP, ST_OACC_DECLARE, ST_OACC_UPDATE, ST_OACC_WAIT, ST_OACC_CACHE, ST_OACC_KERNELS_LOOP, ST_OACC_END_KERNELS_LOOP, ST_OACC_ENTER_DATA, ST_OACC_EXIT_DATA, ST_OACC_ROUTINE. (struct gfc_expr_list): New data type. (gfc_get_expr_list): New macro. (gfc_omp_map_op): Add OMP_MAP_FORCE_ALLOC, OMP_MAP_FORCE_DEALLOC, OMP_MAP_FORCE_TO, OMP_MAP_FORCE_FROM, OMP_MAP_FORCE_TOFROM, OMP_MAP_FORCE_PRESENT, OMP_MAP_FORCE_DEVICEPTR. (OMP_LIST_FIRST, OMP_LIST_DEVICE_RESIDENT, OMP_LIST_USE_DEVICE) (OMP_LIST_CACHE): New enumerators. (struct gfc_omp_clauses): Add async_expr, gang_expr, worker_expr, vector_expr, num_gangs_expr, num_workers_expr, vector_length_expr, wait_list, tile_list, async, gang, worker, vector, seq, independent, wait, par_auto, gang_static, and loc members. (struct gfc_namespace): Add oacc_declare_clauses member. (gfc_exec_op): Add EXEC_OACC_KERNELS_LOOP, EXEC_OACC_PARALLEL_LOOP, EXEC_OACC_PARALLEL, EXEC_OACC_KERNELS, EXEC_OACC_DATA, EXEC_OACC_HOST_DATA, EXEC_OACC_LOOP, EXEC_OACC_UPDATE, EXEC_OACC_WAIT, EXEC_OACC_CACHE, EXEC_OACC_ENTER_DATA, EXEC_OACC_EXIT_DATA. (gfc_free_expr_list, gfc_resolve_oacc_directive) (gfc_resolve_oacc_declare, gfc_resolve_oacc_parallel_loop_blocks) (gfc_resolve_oacc_blocks): New prototypes. * match.c (match_exit_cycle): Handle EXEC_OACC_LOOP and EXEC_OACC_PARALLEL_LOOP. * match.h (gfc_match_oacc_cache, gfc_match_oacc_wait) (gfc_match_oacc_update, gfc_match_oacc_declare) (gfc_match_oacc_loop, gfc_match_oacc_host_data) (gfc_match_oacc_data, gfc_match_oacc_kernels) (gfc_match_oacc_kernels_loop, gfc_match_oacc_parallel) (gfc_match_oacc_parallel_loop, gfc_match_oacc_enter_data) (gfc_match_oacc_exit_data, gfc_match_oacc_routine): New prototypes. * openmp.c: Include "diagnostic.h" and "gomp-constants.h". (gfc_free_omp_clauses): Update for members added to struct gfc_omp_clauses. (gfc_match_omp_clauses): Change mask paramter to uint64_t. Add openacc parameter. (resolve_omp_clauses): Add openacc parameter. Update for OpenACC. (struct fortran_omp_context): Add is_openmp member. (gfc_resolve_omp_parallel_blocks): Initialize it. (gfc_resolve_do_iterator): Update for OpenACC. (gfc_resolve_omp_directive): Call resolve_omp_directive_inside_oacc_region. (OMP_CLAUSE_PRIVATE, OMP_CLAUSE_FIRSTPRIVATE) (OMP_CLAUSE_LASTPRIVATE, OMP_CLAUSE_COPYPRIVATE) (OMP_CLAUSE_SHARED, OMP_CLAUSE_COPYIN, OMP_CLAUSE_REDUCTION) (OMP_CLAUSE_IF, OMP_CLAUSE_NUM_THREADS, OMP_CLAUSE_SCHEDULE) (OMP_CLAUSE_DEFAULT, OMP_CLAUSE_ORDERED, OMP_CLAUSE_COLLAPSE) (OMP_CLAUSE_UNTIED, OMP_CLAUSE_FINAL, OMP_CLAUSE_MERGEABLE) (OMP_CLAUSE_ALIGNED, OMP_CLAUSE_DEPEND, OMP_CLAUSE_INBRANCH) (OMP_CLAUSE_LINEAR, OMP_CLAUSE_NOTINBRANCH, OMP_CLAUSE_PROC_BIND) (OMP_CLAUSE_SAFELEN, OMP_CLAUSE_SIMDLEN, OMP_CLAUSE_UNIFORM) (OMP_CLAUSE_DEVICE, OMP_CLAUSE_MAP, OMP_CLAUSE_TO) (OMP_CLAUSE_FROM, OMP_CLAUSE_NUM_TEAMS, OMP_CLAUSE_THREAD_LIMIT) (OMP_CLAUSE_DIST_SCHEDULE): Use uint64_t. (OMP_CLAUSE_ASYNC, OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS) (OMP_CLAUSE_VECTOR_LENGTH, OMP_CLAUSE_COPY, OMP_CLAUSE_COPYOUT) (OMP_CLAUSE_CREATE, OMP_CLAUSE_PRESENT) (OMP_CLAUSE_PRESENT_OR_COPY, OMP_CLAUSE_PRESENT_OR_COPYIN) (OMP_CLAUSE_PRESENT_OR_COPYOUT, OMP_CLAUSE_PRESENT_OR_CREATE) (OMP_CLAUSE_DEVICEPTR, OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER) (OMP_CLAUSE_VECTOR, OMP_CLAUSE_SEQ, OMP_CLAUSE_INDEPENDENT) (OMP_CLAUSE_USE_DEVICE, OMP_CLAUSE_DEVICE_RESIDENT) (OMP_CLAUSE_HOST_SELF, OMP_CLAUSE_OACC_DEVICE, OMP_CLAUSE_WAIT) (OMP_CLAUSE_DELETE, OMP_CLAUSE_AUTO, OMP_CLAUSE_TILE): New macros. (gfc_match_omp_clauses): Handle those. (OACC_PARALLEL_CLAUSES, OACC_KERNELS_CLAUSES, OACC_DATA_CLAUSES) (OACC_LOOP_CLAUSES, OACC_PARALLEL_LOOP_CLAUSES) (OACC_KERNELS_LOOP_CLAUSES, OACC_HOST_DATA_CLAUSES) (OACC_DECLARE_CLAUSES, OACC_UPDATE_CLAUSES) (OACC_ENTER_DATA_CLAUSES, OACC_EXIT_DATA_CLAUSES) (OACC_WAIT_CLAUSES): New macros. (gfc_free_expr_list, match_oacc_expr_list, match_oacc_clause_gang) (gfc_match_omp_map_clause, gfc_match_oacc_parallel_loop) (gfc_match_oacc_parallel, gfc_match_oacc_kernels_loop) (gfc_match_oacc_kernels, gfc_match_oacc_data) (gfc_match_oacc_host_data, gfc_match_oacc_loop) (gfc_match_oacc_declare, gfc_match_oacc_update) (gfc_match_oacc_enter_data, gfc_match_oacc_exit_data) (gfc_match_oacc_wait, gfc_match_oacc_cache) (gfc_match_oacc_routine, oacc_is_loop) (resolve_oacc_scalar_int_expr, resolve_oacc_positive_int_expr) (check_symbol_not_pointer, check_array_not_assumed) (resolve_oacc_data_clauses, resolve_oacc_deviceptr_clause) (oacc_compatible_clauses, oacc_is_parallel, oacc_is_kernels) (omp_code_to_statement, oacc_code_to_statement) (resolve_oacc_directive_inside_omp_region) (resolve_omp_directive_inside_oacc_region) (resolve_oacc_nested_loops, resolve_oacc_params_in_parallel) (resolve_oacc_loop_blocks, gfc_resolve_oacc_blocks) (resolve_oacc_loop, resolve_oacc_cache, gfc_resolve_oacc_declare) (gfc_resolve_oacc_directive): New functions. * parse.c (next_free): Update for OpenACC. Move some code into... (verify_token_free): ... this new function. (next_fixed): Update for OpenACC. Move some code into... (verify_token_fixed): ... this new function. (case_executable): Add ST_OACC_UPDATE, ST_OACC_WAIT, ST_OACC_CACHE, ST_OACC_ENTER_DATA, and ST_OACC_EXIT_DATA. (case_exec_markers): Add ST_OACC_PARALLEL_LOOP, ST_OACC_PARALLEL, ST_OACC_KERNELS, ST_OACC_DATA, ST_OACC_HOST_DATA, ST_OACC_LOOP, ST_OACC_KERNELS_LOOP. (case_decl): Add ST_OACC_ROUTINE. (push_state, parse_critical_block, parse_progunit): Update for OpenACC. (gfc_ascii_statement): Handle ST_OACC_PARALLEL_LOOP, ST_OACC_END_PARALLEL_LOOP, ST_OACC_PARALLEL, ST_OACC_END_PARALLEL, ST_OACC_KERNELS, ST_OACC_END_KERNELS, ST_OACC_KERNELS_LOOP, ST_OACC_END_KERNELS_LOOP, ST_OACC_DATA, ST_OACC_END_DATA, ST_OACC_HOST_DATA, ST_OACC_END_HOST_DATA, ST_OACC_LOOP, ST_OACC_END_LOOP, ST_OACC_DECLARE, ST_OACC_UPDATE, ST_OACC_WAIT, ST_OACC_CACHE, ST_OACC_ENTER_DATA, ST_OACC_EXIT_DATA, ST_OACC_ROUTINE. (verify_st_order, parse_spec): Handle ST_OACC_DECLARE. (parse_executable): Handle ST_OACC_PARALLEL_LOOP, ST_OACC_KERNELS_LOOP, ST_OACC_LOOP, ST_OACC_PARALLEL, ST_OACC_KERNELS, ST_OACC_DATA, ST_OACC_HOST_DATA. (decode_oacc_directive, parse_oacc_structured_block) (parse_oacc_loop, is_oacc): New functions. * parse.h (struct gfc_state_data): Add oacc_declare_clauses member. (is_oacc): New prototype. * resolve.c (gfc_resolve_blocks, gfc_resolve_code): Handle EXEC_OACC_PARALLEL_LOOP, EXEC_OACC_PARALLEL, EXEC_OACC_KERNELS_LOOP, EXEC_OACC_KERNELS, EXEC_OACC_DATA, EXEC_OACC_HOST_DATA, EXEC_OACC_LOOP, EXEC_OACC_UPDATE, EXEC_OACC_WAIT, EXEC_OACC_CACHE, EXEC_OACC_ENTER_DATA, EXEC_OACC_EXIT_DATA. (resolve_codes): Call gfc_resolve_oacc_declare. * scanner.c (openacc_flag, openacc_locus): New variables. (skip_free_comments): Update for OpenACC. Move some code into... (skip_omp_attribute): ... this new function. (skip_oacc_attribute): New function. (skip_fixed_comments, gfc_next_char_literal): Update for OpenACC. * st.c (gfc_free_statement): Handle EXEC_OACC_PARALLEL_LOOP, EXEC_OACC_PARALLEL, EXEC_OACC_KERNELS_LOOP, EXEC_OACC_KERNELS, EXEC_OACC_DATA, EXEC_OACC_HOST_DATA, EXEC_OACC_LOOP, EXEC_OACC_UPDATE, EXEC_OACC_WAIT, EXEC_OACC_CACHE, EXEC_OACC_ENTER_DATA, EXEC_OACC_EXIT_DATA. * trans-decl.c (gfc_generate_function_code): Update for OpenACC. * trans-openmp.c: Include "gomp-constants.h". (gfc_omp_finish_clause, gfc_trans_omp_clauses): Use GOMP_MAP_* instead of OMP_CLAUSE_MAP_*. Use OMP_CLAUSE_SET_MAP_KIND. (gfc_trans_omp_clauses): Handle OMP_LIST_USE_DEVICE, OMP_LIST_DEVICE_RESIDENT, OMP_LIST_CACHE, and OMP_MAP_FORCE_ALLOC, OMP_MAP_FORCE_DEALLOC, OMP_MAP_FORCE_TO, OMP_MAP_FORCE_FROM, OMP_MAP_FORCE_TOFROM, OMP_MAP_FORCE_PRESENT, OMP_MAP_FORCE_DEVICEPTR, and gfc_omp_clauses' async, seq, independent, wait_list, num_gangs_expr, num_workers_expr, vector_length_expr, vector, vector_expr, worker, worker_expr, gang, gang_expr members. (gfc_trans_omp_do): Handle EXEC_OACC_LOOP. (gfc_convert_expr_to_tree, gfc_trans_oacc_construct) (gfc_trans_oacc_executable_directive) (gfc_trans_oacc_wait_directive, gfc_trans_oacc_combined_directive) (gfc_trans_oacc_declare, gfc_trans_oacc_directive): New functions. * trans-stmt.c (gfc_trans_block_construct): Update for OpenACC. * trans-stmt.h (gfc_trans_oacc_directive, gfc_trans_oacc_declare): New prototypes. * trans.c (tranc_code): Handle EXEC_OACC_CACHE, EXEC_OACC_WAIT, EXEC_OACC_UPDATE, EXEC_OACC_LOOP, EXEC_OACC_HOST_DATA, EXEC_OACC_DATA, EXEC_OACC_KERNELS, EXEC_OACC_KERNELS_LOOP, EXEC_OACC_PARALLEL, EXEC_OACC_PARALLEL_LOOP, EXEC_OACC_ENTER_DATA, EXEC_OACC_EXIT_DATA. * gfortran.texi: Update for OpenACC. * intrinsic.texi: Likewise. * invoke.texi: Likewise. gcc/lto/ * lto-lang.c (DEF_FUNCTION_TYPE_VAR_8, DEF_FUNCTION_TYPE_VAR_12): New macros. * lto.c: Include "gomp-constants.h". gcc/testsuite/ * lib/target-supports.exp (check_effective_target_fopenacc): New procedure. * g++.dg/goacc-gomp/goacc-gomp.exp: New file. * g++.dg/goacc/goacc.exp: Likewise. * gcc.dg/goacc-gomp/goacc-gomp.exp: Likewise. * gcc.dg/goacc/goacc.exp: Likewise. * gfortran.dg/goacc/goacc.exp: Likewise. * c-c++-common/cpp/openacc-define-1.c: New file. * c-c++-common/cpp/openacc-define-2.c: Likewise. * c-c++-common/cpp/openacc-define-3.c: Likewise. * c-c++-common/goacc-gomp/nesting-1.c: Likewise. * c-c++-common/goacc-gomp/nesting-fail-1.c: Likewise. * c-c++-common/goacc/acc_on_device-2-off.c: Likewise. * c-c++-common/goacc/acc_on_device-2.c: Likewise. * c-c++-common/goacc/asyncwait-1.c: Likewise. * c-c++-common/goacc/cache-1.c: Likewise. * c-c++-common/goacc/clauses-fail.c: Likewise. * c-c++-common/goacc/collapse-1.c: Likewise. * c-c++-common/goacc/data-1.c: Likewise. * c-c++-common/goacc/data-2.c: Likewise. * c-c++-common/goacc/data-clause-duplicate-1.c: Likewise. * c-c++-common/goacc/deviceptr-1.c: Likewise. * c-c++-common/goacc/deviceptr-2.c: Likewise. * c-c++-common/goacc/deviceptr-3.c: Likewise. * c-c++-common/goacc/if-clause-1.c: Likewise. * c-c++-common/goacc/if-clause-2.c: Likewise. * c-c++-common/goacc/kernels-1.c: Likewise. * c-c++-common/goacc/loop-1.c: Likewise. * c-c++-common/goacc/loop-private-1.c: Likewise. * c-c++-common/goacc/nesting-1.c: Likewise. * c-c++-common/goacc/nesting-data-1.c: Likewise. * c-c++-common/goacc/nesting-fail-1.c: Likewise. * c-c++-common/goacc/parallel-1.c: Likewise. * c-c++-common/goacc/pcopy.c: Likewise. * c-c++-common/goacc/pcopyin.c: Likewise. * c-c++-common/goacc/pcopyout.c: Likewise. * c-c++-common/goacc/pcreate.c: Likewise. * c-c++-common/goacc/pragma_context.c: Likewise. * c-c++-common/goacc/present-1.c: Likewise. * c-c++-common/goacc/reduction-1.c: Likewise. * c-c++-common/goacc/reduction-2.c: Likewise. * c-c++-common/goacc/reduction-3.c: Likewise. * c-c++-common/goacc/reduction-4.c: Likewise. * c-c++-common/goacc/sb-1.c: Likewise. * c-c++-common/goacc/sb-2.c: Likewise. * c-c++-common/goacc/sb-3.c: Likewise. * c-c++-common/goacc/update-1.c: Likewise. * gcc.dg/goacc/acc_on_device-1.c: Likewise. * gfortran.dg/goacc/acc_on_device-1.f95: Likewise. * gfortran.dg/goacc/acc_on_device-2-off.f95: Likewise. * gfortran.dg/goacc/acc_on_device-2.f95: Likewise. * gfortran.dg/goacc/assumed.f95: Likewise. * gfortran.dg/goacc/asyncwait-1.f95: Likewise. * gfortran.dg/goacc/asyncwait-2.f95: Likewise. * gfortran.dg/goacc/asyncwait-3.f95: Likewise. * gfortran.dg/goacc/asyncwait-4.f95: Likewise. * gfortran.dg/goacc/branch.f95: Likewise. * gfortran.dg/goacc/cache-1.f95: Likewise. * gfortran.dg/goacc/coarray.f95: Likewise. * gfortran.dg/goacc/continuation-free-form.f95: Likewise. * gfortran.dg/goacc/cray.f95: Likewise. * gfortran.dg/goacc/critical.f95: Likewise. * gfortran.dg/goacc/data-clauses.f95: Likewise. * gfortran.dg/goacc/data-tree.f95: Likewise. * gfortran.dg/goacc/declare-1.f95: Likewise. * gfortran.dg/goacc/enter-exit-data.f95: Likewise. * gfortran.dg/goacc/fixed-1.f: Likewise. * gfortran.dg/goacc/fixed-2.f: Likewise. * gfortran.dg/goacc/fixed-3.f: Likewise. * gfortran.dg/goacc/fixed-4.f: Likewise. * gfortran.dg/goacc/host_data-tree.f95: Likewise. * gfortran.dg/goacc/if.f95: Likewise. * gfortran.dg/goacc/kernels-tree.f95: Likewise. * gfortran.dg/goacc/list.f95: Likewise. * gfortran.dg/goacc/literal.f95: Likewise. * gfortran.dg/goacc/loop-1.f95: Likewise. * gfortran.dg/goacc/loop-2.f95: Likewise. * gfortran.dg/goacc/loop-3.f95: Likewise. * gfortran.dg/goacc/loop-tree-1.f90: Likewise. * gfortran.dg/goacc/omp.f95: Likewise. * gfortran.dg/goacc/parallel-kernels-clauses.f95: Likewise. * gfortran.dg/goacc/parallel-kernels-regions.f95: Likewise. * gfortran.dg/goacc/parallel-tree.f95: Likewise. * gfortran.dg/goacc/parameter.f95: Likewise. * gfortran.dg/goacc/private-1.f95: Likewise. * gfortran.dg/goacc/private-2.f95: Likewise. * gfortran.dg/goacc/private-3.f95: Likewise. * gfortran.dg/goacc/pure-elemental-procedures.f95: Likewise. * gfortran.dg/goacc/reduction-2.f95: Likewise. * gfortran.dg/goacc/reduction.f95: Likewise. * gfortran.dg/goacc/routine-1.f90: Likewise. * gfortran.dg/goacc/routine-2.f90: Likewise. * gfortran.dg/goacc/sentinel-free-form.f95: Likewise. * gfortran.dg/goacc/several-directives.f95: Likewise. * gfortran.dg/goacc/sie.f95: Likewise. * gfortran.dg/goacc/subarrays.f95: Likewise. * gfortran.dg/gomp/map-1.f90: Likewise. * gfortran.dg/openacc-define-1.f90: Likewise. * gfortran.dg/openacc-define-2.f90: Likewise. * gfortran.dg/openacc-define-3.f90: Likewise. * g++.dg/gomp/block-1.C: Update for changed compiler output. * g++.dg/gomp/block-2.C: Likewise. * g++.dg/gomp/block-3.C: Likewise. * g++.dg/gomp/block-5.C: Likewise. * g++.dg/gomp/target-1.C: Likewise. * g++.dg/gomp/target-2.C: Likewise. * g++.dg/gomp/taskgroup-1.C: Likewise. * g++.dg/gomp/teams-1.C: Likewise. * gcc.dg/cilk-plus/jump-openmp.c: Likewise. * gcc.dg/cilk-plus/jump.c: Likewise. * gcc.dg/gomp/block-1.c: Likewise. * gcc.dg/gomp/block-10.c: Likewise. * gcc.dg/gomp/block-2.c: Likewise. * gcc.dg/gomp/block-3.c: Likewise. * gcc.dg/gomp/block-4.c: Likewise. * gcc.dg/gomp/block-5.c: Likewise. * gcc.dg/gomp/block-6.c: Likewise. * gcc.dg/gomp/block-7.c: Likewise. * gcc.dg/gomp/block-8.c: Likewise. * gcc.dg/gomp/block-9.c: Likewise. * gcc.dg/gomp/target-1.c: Likewise. * gcc.dg/gomp/target-2.c: Likewise. * gcc.dg/gomp/taskgroup-1.c: Likewise. * gcc.dg/gomp/teams-1.c: Likewise. include/ * gomp-constants.h: New file. libgomp/ * Makefile.am (search_path): Add $(top_srcdir)/../include. (libgomp_la_SOURCES): Add splay-tree.c, libgomp-plugin.c, oacc-parallel.c, oacc-host.c, oacc-init.c, oacc-mem.c, oacc-async.c, oacc-plugin.c, oacc-cuda.c. [USE_FORTRAN] (libgomp_la_SOURCES): Add openacc.f90. Include $(top_srcdir)/plugin/Makefrag.am. (nodist_libsubinclude_HEADERS): Add openacc.h. [USE_FORTRAN] (nodist_finclude_HEADERS): Add openacc_lib.h, openacc.f90, openacc.mod, openacc_kinds.mod. (omp_lib.mod): Generalize into... (%.mod): ... this new rule. (openacc_kinds.mod, openacc.mod): New rules. * plugin/configfrag.ac: New file. * configure.ac: Move plugin/offloading support into it. Include it. Instantiate testsuite/libgomp-test-support.pt.exp. * plugin/Makefrag.am: New file. * testsuite/Makefile.am (OFFLOAD_TARGETS) (OFFLOAD_ADDITIONAL_OPTIONS, OFFLOAD_ADDITIONAL_LIB_PATHS): Don't export. (libgomp-test-support.exp): New rule. (all-local): Depend on it. * Makefile.in: Regenerate. * testsuite/Makefile.in: Regenerate. * config.h.in: Likewise. * configure: Likewise. * configure.tgt: Harden shell syntax. * env.c: Include "oacc-int.h". (parse_acc_device_type): New function. (gomp_debug_var, goacc_device_type, goacc_device_num): New variables. (initialize_env): Initialize those. Call goacc_runtime_initialize. * error.c (gomp_vdebug, gomp_debug, gomp_vfatal): New functions. (gomp_fatal): Call gomp_vfatal. * libgomp.h: Include "libgomp-plugin.h" and <stdarg.h>. (gomp_debug_var, goacc_device_type, goacc_device_num, gomp_vdebug) (gomp_debug, gomp_verror, gomp_vfatal, gomp_init_targets_once) (splay_tree_node, splay_tree, splay_tree_key) (struct target_mem_desc, struct splay_tree_key_s) (struct gomp_memory_mapping, struct acc_dispatch_t) (struct gomp_device_descr, gomp_acc_insert_pointer) (gomp_acc_remove_pointer, target_mem_desc, gomp_copy_from_async) (gomp_unmap_vars, gomp_init_device, gomp_init_tables) (gomp_free_memmap, gomp_fini_device): New declarations. (gomp_vdebug, gomp_debug): New macros. Include "splay-tree.h". * libgomp.map (OACC_2.0): New symbol version. Use for acc_get_num_devices, acc_get_num_devices_h_, acc_set_device_type, acc_set_device_type_h_, acc_get_device_type, acc_get_device_type_h_, acc_set_device_num, acc_set_device_num_h_, acc_get_device_num, acc_get_device_num_h_, acc_async_test, acc_async_test_h_, acc_async_test_all, acc_async_test_all_h_, acc_wait, acc_wait_h_, acc_wait_async, acc_wait_async_h_, acc_wait_all, acc_wait_all_h_, acc_wait_all_async, acc_wait_all_async_h_, acc_init, acc_init_h_, acc_shutdown, acc_shutdown_h_, acc_on_device, acc_on_device_h_, acc_malloc, acc_free, acc_copyin, acc_copyin_32_h_, acc_copyin_64_h_, acc_copyin_array_h_, acc_present_or_copyin, acc_present_or_copyin_32_h_, acc_present_or_copyin_64_h_, acc_present_or_copyin_array_h_, acc_create, acc_create_32_h_, acc_create_64_h_, acc_create_array_h_, acc_present_or_create, acc_present_or_create_32_h_, acc_present_or_create_64_h_, acc_present_or_create_array_h_, acc_copyout, acc_copyout_32_h_, acc_copyout_64_h_, acc_copyout_array_h_, acc_delete, acc_delete_32_h_, acc_delete_64_h_, acc_delete_array_h_, acc_update_device, acc_update_device_32_h_, acc_update_device_64_h_, acc_update_device_array_h_, acc_update_self, acc_update_self_32_h_, acc_update_self_64_h_, acc_update_self_array_h_, acc_map_data, acc_unmap_data, acc_deviceptr, acc_hostptr, acc_is_present, acc_is_present_32_h_, acc_is_present_64_h_, acc_is_present_array_h_, acc_memcpy_to_device, acc_memcpy_from_device, acc_get_current_cuda_device, acc_get_current_cuda_context, acc_get_cuda_stream, acc_set_cuda_stream. (GOACC_2.0): New symbol version. Use for GOACC_data_end, GOACC_data_start, GOACC_enter_exit_data, GOACC_parallel, GOACC_update, GOACC_wait, GOACC_get_thread_num, GOACC_get_num_threads. (GOMP_PLUGIN_1.0): New symbol version. Use for GOMP_PLUGIN_malloc, GOMP_PLUGIN_malloc_cleared, GOMP_PLUGIN_realloc, GOMP_PLUGIN_debug, GOMP_PLUGIN_error, GOMP_PLUGIN_fatal, GOMP_PLUGIN_async_unmap_vars, GOMP_PLUGIN_acc_thread. * libgomp.texi: Update for OpenACC changes, and GOMP_DEBUG environment variable. * libgomp_g.h (GOACC_data_start, GOACC_data_end) (GOACC_enter_exit_data, GOACC_parallel, GOACC_update, GOACC_wait) (GOACC_get_num_threads, GOACC_get_thread_num): New declarations. * splay-tree.h (splay_tree_lookup, splay_tree_insert) (splay_tree_remove): New declarations. (rotate_left, rotate_right, splay_tree_splay, splay_tree_insert) (splay_tree_remove, splay_tree_lookup): Move into... * splay-tree.c: ... this new file. * target.c: Include "oacc-plugin.h", "oacc-int.h", <assert.h>. (splay_tree_node, splay_tree, splay_tree_key) (struct target_mem_desc, struct splay_tree_key_s) (struct gomp_device_descr): Don't declare. (num_devices_openmp): New variable. (gomp_get_num_devices ): Use it. (gomp_init_targets_once): New function. (gomp_get_num_devices ): Use it. (get_kind, gomp_copy_from_async, gomp_free_memmap) (gomp_fini_device, gomp_register_image_for_device): New functions. (gomp_map_vars): Add devaddrs parameter. (gomp_update): Add mm parameter. (gomp_init_device): Move most of it into... (gomp_init_tables): ... this new function. (gomp_register_images_for_device): Remove function. (splay_compare, gomp_map_vars, gomp_unmap_vars, gomp_init_device): Make them hidden instead of static. (gomp_map_vars_existing, gomp_map_vars, gomp_unmap_vars) (gomp_update, gomp_init_device, GOMP_target, GOMP_target_data) (GOMP_target_end_data, GOMP_target_update) (gomp_load_plugin_for_device, gomp_target_init): Update for OpenACC changes. * oacc-async.c: New file. * oacc-cuda.c: Likewise. * oacc-host.c: Likewise. * oacc-init.c: Likewise. * oacc-int.h: Likewise. * oacc-mem.c: Likewise. * oacc-parallel.c: Likewise. * oacc-plugin.c: Likewise. * oacc-plugin.h: Likewise. * oacc-ptx.h: Likewise. * openacc.f90: Likewise. * openacc.h: Likewise. * openacc_lib.h: Likewise. * plugin/plugin-host.c: Likewise. * plugin/plugin-nvptx.c: Likewise. * libgomp-plugin.c: Likewise. * libgomp-plugin.h: Likewise. * libgomp_target.h: Remove file after merging content into the former file. Update all users. * testsuite/lib/libgomp.exp: Load libgomp-test-support.exp. (offload_targets_s, offload_targets_s_openacc): New variables. (check_effective_target_openacc_nvidia_accel_present) (check_effective_target_openacc_nvidia_accel_selected): New procedures. (libgomp_init): Update for OpenACC changes. * testsuite/libgomp-test-support.exp.in: New file. * testsuite/libgomp.oacc-c++/c++.exp: Likewise. * testsuite/libgomp.oacc-c/c.exp: Likewise. * testsuite/libgomp.oacc-fortran/fortran.exp: Likewise. * testsuite/libgomp.oacc-c-c++-common/abort-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/abort-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/abort-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/abort-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/cache-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/clauses-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/clauses-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/collapse-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/collapse-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/collapse-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/collapse-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/context-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/context-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/context-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/context-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/data-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/data-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/data-already-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/data-already-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/data-already-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/data-already-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/data-already-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/data-already-6.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/data-already-7.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/data-already-8.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/if-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-empty.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-10.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-11.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-12.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-13.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-14.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-15.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-16.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-17.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-18.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-19.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-20.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-21.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-22.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-23.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-24.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-25.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-26.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-27.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-28.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-29.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-30.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-31.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-32.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-33.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-34.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-35.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-36.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-37.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-38.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-39.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-40.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-41.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-42.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-43.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-44.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-45.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-46.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-47.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-48.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-49.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-50.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-51.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-52.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-53.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-54.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-55.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-56.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-57.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-58.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-59.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-6.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-60.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-61.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-62.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-63.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-64.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-65.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-66.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-67.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-68.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-69.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-7.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-70.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-71.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-72.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-73.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-74.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-75.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-76.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-77.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-78.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-80.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-81.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-82.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-83.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-84.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-85.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-86.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-87.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-88.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-89.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-9.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-90.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-91.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-92.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/nested-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/nested-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/offset-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-empty.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/pointer-align-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/present-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/present-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-initial-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/subr.h: Likewise. * testsuite/libgomp.oacc-c-c++-common/subr.ptx: Likewise. * testsuite/libgomp.oacc-c-c++-common/timer.h: Likewise. * testsuite/libgomp.oacc-c-c++-common/update-1-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/update-1.c: Likewise. * testsuite/libgomp.oacc-fortran/abort-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/abort-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f: Likewise. * testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f: Likewise. * testsuite/libgomp.oacc-fortran/asyncwait-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/asyncwait-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/asyncwait-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/collapse-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/collapse-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/collapse-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/collapse-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/collapse-5.f90: Likewise. * testsuite/libgomp.oacc-fortran/collapse-6.f90: Likewise. * testsuite/libgomp.oacc-fortran/collapse-7.f90: Likewise. * testsuite/libgomp.oacc-fortran/collapse-8.f90: Likewise. * testsuite/libgomp.oacc-fortran/data-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/data-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/data-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/data-4-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/data-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/data-already-1.f: Likewise. * testsuite/libgomp.oacc-fortran/data-already-2.f: Likewise. * testsuite/libgomp.oacc-fortran/data-already-3.f: Likewise. * testsuite/libgomp.oacc-fortran/data-already-4.f: Likewise. * testsuite/libgomp.oacc-fortran/data-already-5.f: Likewise. * testsuite/libgomp.oacc-fortran/data-already-6.f: Likewise. * testsuite/libgomp.oacc-fortran/data-already-7.f: Likewise. * testsuite/libgomp.oacc-fortran/data-already-8.f: Likewise. * testsuite/libgomp.oacc-fortran/lib-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-10.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-2.f: Likewise. * testsuite/libgomp.oacc-fortran/lib-3.f: Likewise. * testsuite/libgomp.oacc-fortran/lib-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-5.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-6.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-7.f90: Likewise. * testsuite/libgomp.oacc-fortran/lib-8.f90: Likewise. * testsuite/libgomp.oacc-fortran/map-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/openacc_version-1.f: Likewise. * testsuite/libgomp.oacc-fortran/openacc_version-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/pointer-align-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/pset-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise. * testsuite/libgomp.oacc-fortran/routine-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/routine-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/routine-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/routine-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/subarrays-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/subarrays-2.f90: Likewise. liboffloadmic/ * plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_get_name) (GOMP_OFFLOAD_get_caps, GOMP_OFFLOAD_fini_device): New functions. Co-Authored-By: Bernd Schmidt <bernds@codesourcery.com> Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com> Co-Authored-By: Dmitry Bocharnikov <dmitry.b@samsung.com> Co-Authored-By: Evgeny Gavrin <e.gavrin@samsung.com> Co-Authored-By: Ilmir Usmanov <i.usmanov@samsung.com> Co-Authored-By: Jakub Jelinek <jakub@redhat.com> Co-Authored-By: James Norris <jnorris@codesourcery.com> Co-Authored-By: Julian Brown <julian@codesourcery.com> Co-Authored-By: Nathan Sidwell <nathan@codesourcery.com> Co-Authored-By: Tobias Burnus <burnus@net-b.de> Co-Authored-By: Tom de Vries <tom@codesourcery.com> From-SVN: r219682
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/ChangeLog351
-rw-r--r--libgomp/Makefile.am24
-rw-r--r--libgomp/Makefile.in171
-rw-r--r--libgomp/config.h.in9
-rwxr-xr-xlibgomp/configure249
-rw-r--r--libgomp/configure.ac42
-rw-r--r--libgomp/configure.tgt2
-rw-r--r--libgomp/env.c24
-rw-r--r--libgomp/error.c32
-rw-r--r--libgomp/libgomp-plugin.c80
-rw-r--r--libgomp/libgomp-plugin.h80
-rw-r--r--libgomp/libgomp.h217
-rw-r--r--libgomp/libgomp.map104
-rw-r--r--libgomp/libgomp.texi40
-rw-r--r--libgomp/libgomp_g.h16
-rw-r--r--libgomp/oacc-async.c77
-rw-r--r--libgomp/oacc-cuda.c84
-rw-r--r--libgomp/oacc-host.c100
-rw-r--r--libgomp/oacc-init.c636
-rw-r--r--libgomp/oacc-int.h105
-rw-r--r--libgomp/oacc-mem.c585
-rw-r--r--libgomp/oacc-parallel.c490
-rw-r--r--libgomp/oacc-plugin.c48
-rw-r--r--libgomp/oacc-plugin.h (renamed from libgomp/libgomp_target.h)30
-rw-r--r--libgomp/oacc-ptx.h202
-rw-r--r--libgomp/openacc.f90956
-rw-r--r--libgomp/openacc.h118
-rw-r--r--libgomp/openacc_lib.h381
-rw-r--r--libgomp/plugin/Makefrag.am49
-rw-r--r--libgomp/plugin/configfrag.ac148
-rw-r--r--libgomp/plugin/plugin-host.c266
-rw-r--r--libgomp/plugin/plugin-nvptx.c1791
-rw-r--r--libgomp/splay-tree.c217
-rw-r--r--libgomp/splay-tree.h180
-rw-r--r--libgomp/target.c634
-rw-r--r--libgomp/testsuite/Makefile.am17
-rw-r--r--libgomp/testsuite/Makefile.in60
-rw-r--r--libgomp/testsuite/lib/libgomp.exp77
-rw-r--r--libgomp/testsuite/libgomp-test-support.exp.in4
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/c++.exp107
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/abort-1.c17
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/abort-2.c17
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/abort-3.c17
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/abort-4.c17
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c75
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c466
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/cache-1.c48
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c623
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-2.c67
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-1.c31
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c37
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c40
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-4.c27
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/context-1.c213
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c223
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/context-3.c200
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c213
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-1.c188
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c162
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c166
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-1.c19
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-2.c16
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-3.c17
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-4.c17
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-5.c17
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-6.c17
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-7.c17
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-8.c16
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c32
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c613
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c184
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-empty.c6
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-1.c24
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-10.c58
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-11.c23
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-12.c37
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c60
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c61
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c33
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-16.c29
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-17.c31
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-18.c34
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-19.c60
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-2.c26
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c29
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-21.c29
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-22.c29
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c39
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c55
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-25.c30
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-26.c26
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-27.c26
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-28.c26
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-29.c26
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-3.c15
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-30.c26
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-31.c27
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-32.c38
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-33.c31
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c33
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-35.c26
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-36.c26
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-37.c40
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-38.c64
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-39.c41
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-4.c13
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-40.c42
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-41.c43
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c35
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-43.c45
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c45
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-45.c50
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-46.c42
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-47.c43
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c43
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-49.c48
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-5.c40
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-50.c30
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-51.c41
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c28
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c28
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c28
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-55.c48
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-56.c33
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-57.c28
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-58.c28
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-59.c55
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-6.c39
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-60.c54
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-61.c70
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-62.c49
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-63.c43
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-64.c43
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-65.c43
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-66.c48
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-67.c43
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-68.c43
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c124
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-7.c18
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c136
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c119
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c121
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c134
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c139
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c141
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c147
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c135
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c140
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c167
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c132
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c211
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c144
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c58
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c66
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c52
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-86.c42
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-87.c42
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-88.c111
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-89.c118
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-9.c70
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-90.c137
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-91.c84
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/lib-92.c112
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/nested-1.c680
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/nested-2.c141
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/offset-1.c97
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-1.c206
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-empty.c6
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/pointer-align-1.c35
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/present-1.c48
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/present-2.c48
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c174
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c126
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c126
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c129
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c32
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-initial-1.c25
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h46
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx148
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h103
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c282
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/update-1.c280
-rw-r--r--libgomp/testsuite/libgomp.oacc-c/c.exp71
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/abort-1.f9010
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/abort-2.f9013
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f9052
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f52
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f52
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90135
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f9040
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f9042
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/collapse-1.f9027
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/collapse-2.f9025
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/collapse-3.f9028
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/collapse-4.f9040
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/collapse-5.f9048
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/collapse-6.f9050
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/collapse-7.f9040
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/collapse-8.f9047
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-1.f9045
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-2.f9031
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-3.f90131
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-4-2.f90138
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-4.f90136
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-already-1.f17
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-already-2.f16
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-already-3.f15
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-already-4.f14
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-already-5.f14
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-already-6.f14
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-already-7.f14
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/data-already-8.f16
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/fortran.exp98
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/lib-1.f9013
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/lib-10.f9082
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/lib-2.f13
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/lib-3.f13
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/lib-4.f9035
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/lib-5.f9031
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/lib-6.f9035
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/lib-7.f9031
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/lib-8.f9083
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/map-1.f9097
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f9
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f909
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f9021
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90229
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90225
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90170
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90170
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f9054
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f9032
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f9030
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/routine-1.f9032
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/routine-2.f9029
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/routine-3.f9027
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/routine-4.f9023
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f9097
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90100
239 files changed, 23335 insertions, 548 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 6e1e141..9b003cb 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,354 @@
+2015-01-15 Thomas Schwinge <thomas@codesourcery.com>
+ James Norris <jnorris@codesourcery.com>
+ Tom de Vries <tom@codesourcery.com>
+ Julian Brown <julian@codesourcery.com>
+ Cesar Philippidis <cesar@codesourcery.com>
+ Nathan Sidwell <nathan@codesourcery.com>
+ Tobias Burnus <burnus@net-b.de>
+
+ * Makefile.am (search_path): Add $(top_srcdir)/../include.
+ (libgomp_la_SOURCES): Add splay-tree.c, libgomp-plugin.c,
+ oacc-parallel.c, oacc-host.c, oacc-init.c, oacc-mem.c,
+ oacc-async.c, oacc-plugin.c, oacc-cuda.c.
+ [USE_FORTRAN] (libgomp_la_SOURCES): Add openacc.f90.
+ Include $(top_srcdir)/plugin/Makefrag.am.
+ (nodist_libsubinclude_HEADERS): Add openacc.h.
+ [USE_FORTRAN] (nodist_finclude_HEADERS): Add openacc_lib.h,
+ openacc.f90, openacc.mod, openacc_kinds.mod.
+ (omp_lib.mod): Generalize into...
+ (%.mod): ... this new rule.
+ (openacc_kinds.mod, openacc.mod): New rules.
+ * plugin/configfrag.ac: New file.
+ * configure.ac: Move plugin/offloading support into it. Include
+ it. Instantiate testsuite/libgomp-test-support.pt.exp.
+ * plugin/Makefrag.am: New file.
+ * testsuite/Makefile.am (OFFLOAD_TARGETS)
+ (OFFLOAD_ADDITIONAL_OPTIONS, OFFLOAD_ADDITIONAL_LIB_PATHS): Don't
+ export.
+ (libgomp-test-support.exp): New rule.
+ (all-local): Depend on it.
+ * Makefile.in: Regenerate.
+ * testsuite/Makefile.in: Regenerate.
+ * config.h.in: Likewise.
+ * configure: Likewise.
+ * configure.tgt: Harden shell syntax.
+ * env.c: Include "oacc-int.h".
+ (parse_acc_device_type): New function.
+ (gomp_debug_var, goacc_device_type, goacc_device_num): New
+ variables.
+ (initialize_env): Initialize those. Call
+ goacc_runtime_initialize.
+ * error.c (gomp_vdebug, gomp_debug, gomp_vfatal): New functions.
+ (gomp_fatal): Call gomp_vfatal.
+ * libgomp.h: Include "libgomp-plugin.h" and <stdarg.h>.
+ (gomp_debug_var, goacc_device_type, goacc_device_num, gomp_vdebug)
+ (gomp_debug, gomp_verror, gomp_vfatal, gomp_init_targets_once)
+ (splay_tree_node, splay_tree, splay_tree_key)
+ (struct target_mem_desc, struct splay_tree_key_s)
+ (struct gomp_memory_mapping, struct acc_dispatch_t)
+ (struct gomp_device_descr, gomp_acc_insert_pointer)
+ (gomp_acc_remove_pointer, target_mem_desc, gomp_copy_from_async)
+ (gomp_unmap_vars, gomp_init_device, gomp_init_tables)
+ (gomp_free_memmap, gomp_fini_device): New declarations.
+ (gomp_vdebug, gomp_debug): New macros.
+ Include "splay-tree.h".
+ * libgomp.map (OACC_2.0): New symbol version. Use for
+ acc_get_num_devices, acc_get_num_devices_h_, acc_set_device_type,
+ acc_set_device_type_h_, acc_get_device_type,
+ acc_get_device_type_h_, acc_set_device_num, acc_set_device_num_h_,
+ acc_get_device_num, acc_get_device_num_h_, acc_async_test,
+ acc_async_test_h_, acc_async_test_all, acc_async_test_all_h_,
+ acc_wait, acc_wait_h_, acc_wait_async, acc_wait_async_h_,
+ acc_wait_all, acc_wait_all_h_, acc_wait_all_async,
+ acc_wait_all_async_h_, acc_init, acc_init_h_, acc_shutdown,
+ acc_shutdown_h_, acc_on_device, acc_on_device_h_, acc_malloc,
+ acc_free, acc_copyin, acc_copyin_32_h_, acc_copyin_64_h_,
+ acc_copyin_array_h_, acc_present_or_copyin,
+ acc_present_or_copyin_32_h_, acc_present_or_copyin_64_h_,
+ acc_present_or_copyin_array_h_, acc_create, acc_create_32_h_,
+ acc_create_64_h_, acc_create_array_h_, acc_present_or_create,
+ acc_present_or_create_32_h_, acc_present_or_create_64_h_,
+ acc_present_or_create_array_h_, acc_copyout, acc_copyout_32_h_,
+ acc_copyout_64_h_, acc_copyout_array_h_, acc_delete,
+ acc_delete_32_h_, acc_delete_64_h_, acc_delete_array_h_,
+ acc_update_device, acc_update_device_32_h_,
+ acc_update_device_64_h_, acc_update_device_array_h_,
+ acc_update_self, acc_update_self_32_h_, acc_update_self_64_h_,
+ acc_update_self_array_h_, acc_map_data, acc_unmap_data,
+ acc_deviceptr, acc_hostptr, acc_is_present, acc_is_present_32_h_,
+ acc_is_present_64_h_, acc_is_present_array_h_,
+ acc_memcpy_to_device, acc_memcpy_from_device,
+ acc_get_current_cuda_device, acc_get_current_cuda_context,
+ acc_get_cuda_stream, acc_set_cuda_stream.
+ (GOACC_2.0): New symbol version. Use for GOACC_data_end,
+ GOACC_data_start, GOACC_enter_exit_data, GOACC_parallel,
+ GOACC_update, GOACC_wait, GOACC_get_thread_num,
+ GOACC_get_num_threads.
+ (GOMP_PLUGIN_1.0): New symbol version. Use for
+ GOMP_PLUGIN_malloc, GOMP_PLUGIN_malloc_cleared,
+ GOMP_PLUGIN_realloc, GOMP_PLUGIN_debug, GOMP_PLUGIN_error,
+ GOMP_PLUGIN_fatal, GOMP_PLUGIN_async_unmap_vars,
+ GOMP_PLUGIN_acc_thread.
+ * libgomp.texi: Update for OpenACC changes, and GOMP_DEBUG
+ environment variable.
+ * libgomp_g.h (GOACC_data_start, GOACC_data_end)
+ (GOACC_enter_exit_data, GOACC_parallel, GOACC_update, GOACC_wait)
+ (GOACC_get_num_threads, GOACC_get_thread_num): New declarations.
+ * splay-tree.h (splay_tree_lookup, splay_tree_insert)
+ (splay_tree_remove): New declarations.
+ (rotate_left, rotate_right, splay_tree_splay, splay_tree_insert)
+ (splay_tree_remove, splay_tree_lookup): Move into...
+ * splay-tree.c: ... this new file.
+ * target.c: Include "oacc-plugin.h", "oacc-int.h", <assert.h>.
+ (splay_tree_node, splay_tree, splay_tree_key)
+ (struct target_mem_desc, struct splay_tree_key_s)
+ (struct gomp_device_descr): Don't declare.
+ (num_devices_openmp): New variable.
+ (gomp_get_num_devices ): Use it.
+ (gomp_init_targets_once): New function.
+ (gomp_get_num_devices ): Use it.
+ (get_kind, gomp_copy_from_async, gomp_free_memmap)
+ (gomp_fini_device, gomp_register_image_for_device): New functions.
+ (gomp_map_vars): Add devaddrs parameter.
+ (gomp_update): Add mm parameter.
+ (gomp_init_device): Move most of it into...
+ (gomp_init_tables): ... this new function.
+ (gomp_register_images_for_device): Remove function.
+ (splay_compare, gomp_map_vars, gomp_unmap_vars, gomp_init_device):
+ Make them hidden instead of static.
+ (gomp_map_vars_existing, gomp_map_vars, gomp_unmap_vars)
+ (gomp_update, gomp_init_device, GOMP_target, GOMP_target_data)
+ (GOMP_target_end_data, GOMP_target_update)
+ (gomp_load_plugin_for_device, gomp_target_init): Update for
+ OpenACC changes.
+ * oacc-async.c: New file.
+ * oacc-cuda.c: Likewise.
+ * oacc-host.c: Likewise.
+ * oacc-init.c: Likewise.
+ * oacc-int.h: Likewise.
+ * oacc-mem.c: Likewise.
+ * oacc-parallel.c: Likewise.
+ * oacc-plugin.c: Likewise.
+ * oacc-plugin.h: Likewise.
+ * oacc-ptx.h: Likewise.
+ * openacc.f90: Likewise.
+ * openacc.h: Likewise.
+ * openacc_lib.h: Likewise.
+ * plugin/plugin-host.c: Likewise.
+ * plugin/plugin-nvptx.c: Likewise.
+ * libgomp-plugin.c: Likewise.
+ * libgomp-plugin.h: Likewise.
+ * libgomp_target.h: Remove file after merging content into the
+ former file. Update all users.
+ * testsuite/lib/libgomp.exp: Load libgomp-test-support.exp.
+ (offload_targets_s, offload_targets_s_openacc): New variables.
+ (check_effective_target_openacc_nvidia_accel_present)
+ (check_effective_target_openacc_nvidia_accel_selected): New
+ procedures.
+ (libgomp_init): Update for OpenACC changes.
+ * testsuite/libgomp-test-support.exp.in: New file.
+ * testsuite/libgomp.oacc-c++/c++.exp: Likewise.
+ * testsuite/libgomp.oacc-c/c.exp: Likewise.
+ * testsuite/libgomp.oacc-fortran/fortran.exp: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/abort-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/abort-2.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/abort-3.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/abort-4.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/cache-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/clauses-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/clauses-2.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/collapse-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/collapse-2.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/collapse-3.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/collapse-4.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/context-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/context-2.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/context-3.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/context-4.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/data-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/data-3.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/data-already-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/data-already-2.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/data-already-3.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/data-already-4.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/data-already-5.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/data-already-6.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/data-already-7.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/data-already-8.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/if-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/kernels-empty.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-10.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-11.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-12.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-13.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-14.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-15.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-16.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-17.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-18.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-19.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-2.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-20.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-21.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-22.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-23.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-24.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-25.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-26.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-27.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-28.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-29.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-3.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-30.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-31.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-32.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-33.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-34.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-35.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-36.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-37.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-38.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-39.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-4.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-40.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-41.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-42.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-43.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-44.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-45.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-46.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-47.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-48.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-49.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-5.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-50.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-51.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-52.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-53.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-54.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-55.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-56.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-57.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-58.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-59.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-6.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-60.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-61.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-62.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-63.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-64.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-65.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-66.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-67.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-68.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-69.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-7.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-70.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-71.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-72.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-73.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-74.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-75.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-76.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-77.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-78.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-80.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-81.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-82.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-83.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-84.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-85.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-86.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-87.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-88.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-89.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-9.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-90.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-91.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-92.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/nested-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/nested-2.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/offset-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/parallel-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/parallel-empty.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/pointer-align-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/present-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/present-2.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/reduction-initial-1.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/subr.h: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/subr.ptx: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/timer.h: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/update-1-2.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/update-1.c: Likewise.
+ * testsuite/libgomp.oacc-fortran/abort-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/abort-2.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/asyncwait-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/asyncwait-2.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/asyncwait-3.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/collapse-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/collapse-2.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/collapse-3.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/collapse-4.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/collapse-5.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/collapse-6.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/collapse-7.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/collapse-8.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-2.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-3.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-4-2.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-4.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-already-1.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-already-2.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-already-3.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-already-4.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-already-5.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-already-6.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-already-7.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/data-already-8.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/lib-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/lib-10.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/lib-2.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/lib-3.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/lib-4.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/lib-5.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/lib-6.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/lib-7.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/lib-8.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/map-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/openacc_version-1.f: Likewise.
+ * testsuite/libgomp.oacc-fortran/openacc_version-2.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/pointer-align-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/pset-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/reduction-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/routine-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/routine-2.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/routine-3.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/routine-4.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/subarrays-1.f90: Likewise.
+ * testsuite/libgomp.oacc-fortran/subarrays-2.f90: Likewise.
+
2015-01-10 Thomas Schwinge <thomas@codesourcery.com>
Julian Brown <julian@codesourcery.com>
David Malcolm <dmalcolm@redhat.com>
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 427415e..5411278 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -7,7 +7,8 @@ SUBDIRS = testsuite
gcc_version := $(shell cat $(top_srcdir)/../gcc/BASE-VER)
config_path = @config_path@
-search_path = $(addprefix $(top_srcdir)/config/, $(config_path)) $(top_srcdir)
+search_path = $(addprefix $(top_srcdir)/config/, $(config_path)) $(top_srcdir) \
+ $(top_srcdir)/../include
fincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/finclude
libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
@@ -60,12 +61,21 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS)
libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
- time.c fortran.c affinity.c target.c
+ time.c fortran.c affinity.c target.c splay-tree.c libgomp-plugin.c \
+ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c oacc-async.c \
+ oacc-plugin.c oacc-cuda.c
+
+include $(top_srcdir)/plugin/Makefrag.am
+
+if USE_FORTRAN
+libgomp_la_SOURCES += openacc.f90
+endif
nodist_noinst_HEADERS = libgomp_f.h
-nodist_libsubinclude_HEADERS = omp.h
+nodist_libsubinclude_HEADERS = omp.h openacc.h
if USE_FORTRAN
-nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod
+nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
+ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod
endif
LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
@@ -75,7 +85,11 @@ LINK = $(LIBTOOL) --tag CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
omp_lib_kinds.mod: omp_lib.mod
:
-omp_lib.mod: omp_lib.f90
+openacc_kinds.mod: openacc.mod
+ :
+openacc.mod: openacc.lo
+ :
+%.mod: %.f90
$(FC) $(FCFLAGS) -fsyntax-only $<
fortran.lo: libgomp_f.h
fortran.o: libgomp_f.h
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 8e4774f..b61b108 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -15,6 +15,34 @@
@SET_MAKE@
+# Plugins for offload execution, Makefile.am fragment.
+#
+# Copyright (C) 2014-2015 Free Software Foundation, Inc.
+#
+# Contributed by Mentor Embedded.
+#
+# This file is part of the GNU Offloading and Multi Processing Library
+# (libgomp).
+#
+# Libgomp is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+# <http://www.gnu.org/licenses/>.
+
VPATH = @srcdir@
pkgdatadir = $(datadir)/@PACKAGE@
@@ -36,13 +64,16 @@ POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
-subdir = .
-DIST_COMMON = ChangeLog $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+DIST_COMMON = $(top_srcdir)/plugin/Makefrag.am ChangeLog \
+ $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
$(top_srcdir)/configure $(am__configure_deps) \
$(srcdir)/config.h.in $(srcdir)/../mkinstalldirs \
$(srcdir)/omp.h.in $(srcdir)/omp_lib.h.in \
$(srcdir)/omp_lib.f90.in $(srcdir)/libgomp_f.h.in \
$(srcdir)/libgomp.spec.in $(srcdir)/../depcomp
+@PLUGIN_NVPTX_TRUE@am__append_1 = libgomp-plugin-nvptx.la
+@USE_FORTRAN_TRUE@am__append_2 = openacc.f90
+subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
$(top_srcdir)/../config/depstand.m4 \
@@ -56,7 +87,8 @@ am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
$(top_srcdir)/../config/tls.m4 $(top_srcdir)/../ltoptions.m4 \
$(top_srcdir)/../ltsugar.m4 $(top_srcdir)/../ltversion.m4 \
$(top_srcdir)/../lt~obsolete.m4 $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/../libtool.m4 $(top_srcdir)/configure.ac
+ $(top_srcdir)/../libtool.m4 $(top_srcdir)/plugin/configfrag.ac \
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
@@ -91,12 +123,38 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(infodir)" \
"$(DESTDIR)$(fincludedir)" "$(DESTDIR)$(libsubincludedir)" \
"$(DESTDIR)$(toolexeclibdir)"
LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
+libgomp_plugin_host_nonshm_la_DEPENDENCIES = libgomp.la
+am_libgomp_plugin_host_nonshm_la_OBJECTS = \
+ libgomp_plugin_host_nonshm_la-plugin-host.lo
+libgomp_plugin_host_nonshm_la_OBJECTS = \
+ $(am_libgomp_plugin_host_nonshm_la_OBJECTS)
+libgomp_plugin_host_nonshm_la_LINK = $(LIBTOOL) --tag=CC \
+ $(libgomp_plugin_host_nonshm_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(libgomp_plugin_host_nonshm_la_LDFLAGS) $(LDFLAGS) -o $@
+am__DEPENDENCIES_1 =
+@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_DEPENDENCIES = libgomp.la \
+@PLUGIN_NVPTX_TRUE@ $(am__DEPENDENCIES_1)
+@PLUGIN_NVPTX_TRUE@am_libgomp_plugin_nvptx_la_OBJECTS = \
+@PLUGIN_NVPTX_TRUE@ libgomp_plugin_nvptx_la-plugin-nvptx.lo
+libgomp_plugin_nvptx_la_OBJECTS = \
+ $(am_libgomp_plugin_nvptx_la_OBJECTS)
+libgomp_plugin_nvptx_la_LINK = $(LIBTOOL) --tag=CC \
+ $(libgomp_plugin_nvptx_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(libgomp_plugin_nvptx_la_LDFLAGS) $(LDFLAGS) -o $@
+@PLUGIN_NVPTX_TRUE@am_libgomp_plugin_nvptx_la_rpath = -rpath \
+@PLUGIN_NVPTX_TRUE@ $(toolexeclibdir)
libgomp_la_LIBADD =
+@USE_FORTRAN_TRUE@am__objects_1 = openacc.lo
am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \
error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \
parallel.lo sections.lo single.lo task.lo team.lo work.lo \
lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \
- fortran.lo affinity.lo target.lo
+ fortran.lo affinity.lo target.lo splay-tree.lo \
+ libgomp-plugin.lo oacc-parallel.lo oacc-host.lo oacc-init.lo \
+ oacc-mem.lo oacc-async.lo oacc-plugin.lo oacc-cuda.lo \
+ $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@
depcomp = $(SHELL) $(top_srcdir)/../depcomp
@@ -108,7 +166,15 @@ LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
-SOURCES = $(libgomp_la_SOURCES)
+FCCOMPILE = $(FC) $(AM_FCFLAGS) $(FCFLAGS)
+LTFCCOMPILE = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(FC) $(AM_FCFLAGS) $(FCFLAGS)
+FCLD = $(FC)
+FCLINK = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(libgomp_plugin_host_nonshm_la_SOURCES) \
+ $(libgomp_plugin_nvptx_la_SOURCES) $(libgomp_la_SOURCES)
MULTISRCTOP =
MULTIBUILDTOP =
MULTIDIRS =
@@ -155,6 +221,8 @@ CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
+CUDA_DRIVER_INCLUDE = @CUDA_DRIVER_INCLUDE@
+CUDA_DRIVER_LIB = @CUDA_DRIVER_LIB@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
@@ -213,6 +281,10 @@ PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PERL = @PERL@
+PLUGIN_NVPTX = @PLUGIN_NVPTX@
+PLUGIN_NVPTX_CPPFLAGS = @PLUGIN_NVPTX_CPPFLAGS@
+PLUGIN_NVPTX_LDFLAGS = @PLUGIN_NVPTX_LDFLAGS@
+PLUGIN_NVPTX_LIBS = @PLUGIN_NVPTX_LIBS@
RANLIB = @RANLIB@
SECTION_LDFLAGS = @SECTION_LDFLAGS@
SED = @SED@
@@ -293,13 +365,16 @@ top_srcdir = @top_srcdir@
ACLOCAL_AMFLAGS = -I .. -I ../config
SUBDIRS = testsuite
gcc_version := $(shell cat $(top_srcdir)/../gcc/BASE-VER)
-search_path = $(addprefix $(top_srcdir)/config/, $(config_path)) $(top_srcdir)
+search_path = $(addprefix $(top_srcdir)/config/, $(config_path)) $(top_srcdir) \
+ $(top_srcdir)/../include
+
fincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/finclude
libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
AM_CPPFLAGS = $(addprefix -I, $(search_path))
AM_CFLAGS = $(XCFLAGS)
AM_LDFLAGS = $(XLDFLAGS) $(SECTION_LDFLAGS) $(OPT_LDFLAGS)
-toolexeclib_LTLIBRARIES = libgomp.la
+toolexeclib_LTLIBRARIES = libgomp.la $(am__append_1) \
+ libgomp-plugin-host_nonshm.la
nodist_toolexeclib_HEADERS = libgomp.spec
# -Wc is only a libtool option.
@@ -318,13 +393,35 @@ libgomp_la_LDFLAGS = $(libgomp_version_info) $(libgomp_version_script) \
libgomp_la_DEPENDENCIES = $(libgomp_version_dep)
libgomp_la_LINK = $(LINK) $(libgomp_la_LDFLAGS)
libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
- iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
- task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
- time.c fortran.c affinity.c target.c
-
+ iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c \
+ single.c task.c team.c work.c lock.c mutex.c proc.c sem.c \
+ bar.c ptrlock.c time.c fortran.c affinity.c target.c \
+ splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
+ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
+ $(am__append_2)
+
+# Nvidia PTX OpenACC plugin.
+@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
+@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_SOURCES = plugin/plugin-nvptx.c
+@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_CPPFLAGS = $(AM_CPPFLAGS) $(PLUGIN_NVPTX_CPPFLAGS)
+@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_LDFLAGS = \
+@PLUGIN_NVPTX_TRUE@ $(libgomp_plugin_nvptx_version_info) \
+@PLUGIN_NVPTX_TRUE@ $(lt_host_flags) $(PLUGIN_NVPTX_LDFLAGS)
+@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_LIBADD = libgomp.la $(PLUGIN_NVPTX_LIBS)
+@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_LIBTOOLFLAGS = --tag=disable-static
+libgomp_plugin_host_nonshm_version_info = -version-info $(libtool_VERSION)
+libgomp_plugin_host_nonshm_la_SOURCES = plugin/plugin-host.c
+libgomp_plugin_host_nonshm_la_CPPFLAGS = $(AM_CPPFLAGS) -DHOST_NONSHM_PLUGIN
+libgomp_plugin_host_nonshm_la_LDFLAGS = \
+ $(libgomp_plugin_host_nonshm_version_info) $(lt_host_flags)
+
+libgomp_plugin_host_nonshm_la_LIBADD = libgomp.la
+libgomp_plugin_host_nonshm_la_LIBTOOLFLAGS = --tag=disable-static
nodist_noinst_HEADERS = libgomp_f.h
-nodist_libsubinclude_HEADERS = omp.h
-@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod
+nodist_libsubinclude_HEADERS = omp.h openacc.h
+@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
+@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod
+
LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
LINK = $(LIBTOOL) --tag CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
$(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LTLDFLAGS) -o $@
@@ -354,10 +451,10 @@ all: config.h
$(MAKE) $(AM_MAKEFLAGS) all-recursive
.SUFFIXES:
-.SUFFIXES: .c .dvi .lo .o .obj .ps
+.SUFFIXES: .c .dvi .f90 .lo .o .obj .ps
am--refresh:
@:
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/plugin/Makefrag.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -447,6 +544,10 @@ clean-toolexeclibLTLIBRARIES:
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
+libgomp-plugin-host_nonshm.la: $(libgomp_plugin_host_nonshm_la_OBJECTS) $(libgomp_plugin_host_nonshm_la_DEPENDENCIES)
+ $(libgomp_plugin_host_nonshm_la_LINK) -rpath $(toolexeclibdir) $(libgomp_plugin_host_nonshm_la_OBJECTS) $(libgomp_plugin_host_nonshm_la_LIBADD) $(LIBS)
+libgomp-plugin-nvptx.la: $(libgomp_plugin_nvptx_la_OBJECTS) $(libgomp_plugin_nvptx_la_DEPENDENCIES)
+ $(libgomp_plugin_nvptx_la_LINK) $(am_libgomp_plugin_nvptx_la_rpath) $(libgomp_plugin_nvptx_la_OBJECTS) $(libgomp_plugin_nvptx_la_LIBADD) $(LIBS)
libgomp.la: $(libgomp_la_OBJECTS) $(libgomp_la_DEPENDENCIES)
$(libgomp_la_LINK) -rpath $(toolexeclibdir) $(libgomp_la_OBJECTS) $(libgomp_la_LIBADD) $(LIBS)
@@ -466,10 +567,20 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fortran.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_host_nonshm_la-plugin-host.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop_ull.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mutex.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-async.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-cuda.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-host.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-init.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@
@@ -477,6 +588,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@
@@ -504,6 +616,29 @@ distclean-compile:
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
+libgomp_plugin_host_nonshm_la-plugin-host.lo: plugin/plugin-host.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_host_nonshm_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_host_nonshm_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_host_nonshm_la-plugin-host.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_host_nonshm_la-plugin-host.Tpo -c -o libgomp_plugin_host_nonshm_la-plugin-host.lo `test -f 'plugin/plugin-host.c' || echo '$(srcdir)/'`plugin/plugin-host.c
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libgomp_plugin_host_nonshm_la-plugin-host.Tpo $(DEPDIR)/libgomp_plugin_host_nonshm_la-plugin-host.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='plugin/plugin-host.c' object='libgomp_plugin_host_nonshm_la-plugin-host.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_host_nonshm_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_host_nonshm_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libgomp_plugin_host_nonshm_la-plugin-host.lo `test -f 'plugin/plugin-host.c' || echo '$(srcdir)/'`plugin/plugin-host.c
+
+libgomp_plugin_nvptx_la-plugin-nvptx.lo: plugin/plugin-nvptx.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_nvptx_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_nvptx_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_nvptx_la-plugin-nvptx.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Tpo -c -o libgomp_plugin_nvptx_la-plugin-nvptx.lo `test -f 'plugin/plugin-nvptx.c' || echo '$(srcdir)/'`plugin/plugin-nvptx.c
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Tpo $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='plugin/plugin-nvptx.c' object='libgomp_plugin_nvptx_la-plugin-nvptx.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_nvptx_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_nvptx_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libgomp_plugin_nvptx_la-plugin-nvptx.lo `test -f 'plugin/plugin-nvptx.c' || echo '$(srcdir)/'`plugin/plugin-nvptx.c
+
+.f90.o:
+ $(FCCOMPILE) -c -o $@ $<
+
+.f90.obj:
+ $(FCCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.f90.lo:
+ $(LTFCCOMPILE) -c -o $@ $<
+
mostlyclean-libtool:
-rm -f *.lo
@@ -1088,7 +1223,11 @@ vpath % $(strip $(search_path))
omp_lib_kinds.mod: omp_lib.mod
:
-omp_lib.mod: omp_lib.f90
+openacc_kinds.mod: openacc.mod
+ :
+openacc.mod: openacc.lo
+ :
+%.mod: %.f90
$(FC) $(FCFLAGS) -fsyntax-only $<
fortran.lo: libgomp_f.h
fortran.o: libgomp_f.h
diff --git a/libgomp/config.h.in b/libgomp/config.h.in
index a5e27ca..02547b1 100644
--- a/libgomp/config.h.in
+++ b/libgomp/config.h.in
@@ -79,9 +79,6 @@
/* Define to 1 if the target supports thread-local storage. */
#undef HAVE_TLS
-/* Define to 1 if the target use emutls for thread-local storage. */
-#undef USE_EMUTLS
-
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
@@ -116,6 +113,9 @@
/* Define to the version of this package. */
#undef PACKAGE_VERSION
+/* Define to 1 if the NVIDIA plugin is built, 0 if not. */
+#undef PLUGIN_NVPTX
+
/* Define if all infrastructure, needed for plugins, is supported. */
#undef PLUGIN_SUPPORT
@@ -143,5 +143,8 @@
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
#undef TIME_WITH_SYS_TIME
+/* Define to 1 if the target use emutls for thread-local storage. */
+#undef USE_EMUTLS
+
/* Version number of package */
#undef VERSION
diff --git a/libgomp/configure b/libgomp/configure
index 3214e9d..0818707 100755
--- a/libgomp/configure
+++ b/libgomp/configure
@@ -616,9 +616,6 @@ OMP_LOCK_SIZE
USE_FORTRAN_FALSE
USE_FORTRAN_TRUE
link_gomp
-offload_additional_lib_paths
-offload_additional_options
-offload_targets
XLDFLAGS
XCFLAGS
config_path
@@ -630,6 +627,17 @@ LIBGOMP_BUILD_VERSIONED_SHLIB_FALSE
LIBGOMP_BUILD_VERSIONED_SHLIB_TRUE
OPT_LDFLAGS
SECTION_LDFLAGS
+PLUGIN_NVPTX_FALSE
+PLUGIN_NVPTX_TRUE
+offload_additional_lib_paths
+offload_additional_options
+PLUGIN_NVPTX_LIBS
+PLUGIN_NVPTX_LDFLAGS
+PLUGIN_NVPTX_CPPFLAGS
+PLUGIN_NVPTX
+CUDA_DRIVER_LIB
+CUDA_DRIVER_INCLUDE
+offload_targets
libtool_VERSION
ac_ct_FC
FCFLAGS
@@ -770,6 +778,9 @@ enable_fast_install
with_gnu_ld
enable_libtool_lock
enable_maintainer_mode
+with_cuda_driver
+with_cuda_driver_include
+with_cuda_driver_lib
enable_linux_futex
enable_tls
enable_symvers
@@ -1431,6 +1442,16 @@ Optional Packages:
--with-pic try to use only PIC/non-PIC objects [default=use
both]
--with-gnu-ld assume the C compiler uses GNU ld [default=no]
+ --with-cuda-driver=PATH specify prefix directory for installed CUDA driver
+ package. Equivalent to
+ --with-cuda-driver-include=PATH/include plus
+ --with-cuda-driver-lib=PATH/lib
+ --with-cuda-driver-include=PATH
+ specify directory for installed CUDA driver include
+ files
+ --with-cuda-driver-lib=PATH
+ specify directory for the installed CUDA driver
+ library
Some influential environment variables:
CC C compiler command
@@ -11097,7 +11118,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 11100 "configure"
+#line 11121 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -11203,7 +11224,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 11206 "configure"
+#line 11227 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -15055,6 +15076,36 @@ fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
+# Plugins for offload execution, configure.ac fragment. -*- mode: autoconf -*-
+#
+# Copyright (C) 2014-2015 Free Software Foundation, Inc.
+#
+# Contributed by Mentor Embedded.
+#
+# This file is part of the GNU Offloading and Multi Processing Library
+# (libgomp).
+#
+# Libgomp is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+# <http://www.gnu.org/licenses/>.
+
+offload_targets=
+
plugin_support=yes
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlsym in -ldl" >&5
$as_echo_n "checking for dlsym in -ldl... " >&6; }
@@ -15107,8 +15158,152 @@ if test x"$plugin_support" = xyes; then
$as_echo "#define PLUGIN_SUPPORT 1" >>confdefs.h
+ offload_targets=host_nonshm
+elif test "x${enable_offload_targets-no}" != xno; then
+ as_fn_error "Can't support offloading without support for plugins" "$LINENO" 5
+fi
+
+# Look for the CUDA driver package.
+CUDA_DRIVER_INCLUDE=
+CUDA_DRIVER_LIB=
+
+
+CUDA_DRIVER_CPPFLAGS=
+CUDA_DRIVER_LDFLAGS=
+
+# Check whether --with-cuda-driver was given.
+if test "${with_cuda_driver+set}" = set; then :
+ withval=$with_cuda_driver;
fi
+
+# Check whether --with-cuda-driver-include was given.
+if test "${with_cuda_driver_include+set}" = set; then :
+ withval=$with_cuda_driver_include;
+fi
+
+
+# Check whether --with-cuda-driver-lib was given.
+if test "${with_cuda_driver_lib+set}" = set; then :
+ withval=$with_cuda_driver_lib;
+fi
+
+if test "x$with_cuda_driver" != x; then
+ CUDA_DRIVER_INCLUDE=$with_cuda_driver/include
+ CUDA_DRIVER_LIB=$with_cuda_driver/lib
+fi
+if test "x$with_cuda_driver_include" != x; then
+ CUDA_DRIVER_INCLUDE=$with_cuda_driver_include
+fi
+if test "x$with_cuda_driver_lib" != x; then
+ CUDA_DRIVER_LIB=$with_cuda_driver_lib
+fi
+if test "x$CUDA_DRIVER_INCLUDE" != x; then
+ CUDA_DRIVER_CPPFLAGS=-I$CUDA_DRIVER_INCLUDE
+fi
+if test "x$CUDA_DRIVER_LIB" != x; then
+ CUDA_DRIVER_LDFLAGS=-L$CUDA_DRIVER_LIB
+fi
+
+PLUGIN_NVPTX=0
+PLUGIN_NVPTX_CPPFLAGS=
+PLUGIN_NVPTX_LDFLAGS=
+PLUGIN_NVPTX_LIBS=
+
+
+
+
+
+# Get offload targets and path to install tree of offloading compiler.
+offload_additional_options=
+offload_additional_lib_paths=
+
+
+if test x"$enable_offload_targets" != x; then
+ for tgt in `echo $enable_offload_targets | sed -e 's#,# #g'`; do
+ tgt_dir=`echo $tgt | grep '=' | sed 's/.*=//'`
+ tgt=`echo $tgt | sed 's/=.*//'`
+ case $tgt in
+ *-intelmic-* | *-intelmicemul-*)
+ tgt_name=intelmic
+ ;;
+ nvptx*)
+ tgt_name=nvptx
+ PLUGIN_NVPTX=$tgt
+ PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS
+ PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS
+ PLUGIN_NVPTX_LIBS='-lcuda'
+
+ PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS
+ CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS"
+ PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS
+ LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS"
+ PLUGIN_NVPTX_save_LIBS=$LIBS
+ LIBS="$PLUGIN_NVPTX_LIBS $LIBS"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include "cuda.h"
+int
+main ()
+{
+CUresult r = cuCtxPushCurrent (NULL);
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ PLUGIN_NVPTX=1
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+ CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS
+ LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS
+ LIBS=$PLUGIN_NVPTX_save_LIBS
+ case $PLUGIN_NVPTX in
+ nvptx*)
+ PLUGIN_NVPTX=0
+ as_fn_error "CUDA driver package required for nvptx support" "$LINENO" 5
+ ;;
+ esac
+ ;;
+ *)
+ as_fn_error "unknown offload target specified" "$LINENO" 5
+ ;;
+ esac
+ if test x"$offload_targets" = x; then
+ offload_targets=$tgt_name
+ else
+ offload_targets=$offload_targets,$tgt_name
+ fi
+ if test x"$tgt_dir" != x; then
+ offload_additional_options="$offload_additional_options -B$tgt_dir/libexec/gcc/\$(target_alias)/\$(gcc_version) -B$tgt_dir/bin"
+ offload_additional_lib_paths="$offload_additional_lib_paths:$tgt_dir/lib64:$tgt_dir/lib:$tgt_dir/lib32"
+ else
+ offload_additional_options="$offload_additional_options -B\$(libexecdir)/gcc/\$(target_alias)/\$(gcc_version) -B\$(bindir)"
+ offload_additional_lib_paths="$offload_additional_lib_paths:$toolexeclibdir"
+ fi
+ done
+fi
+
+cat >>confdefs.h <<_ACEOF
+#define OFFLOAD_TARGETS "$offload_targets"
+_ACEOF
+
+ if test $PLUGIN_NVPTX = 1; then
+ PLUGIN_NVPTX_TRUE=
+ PLUGIN_NVPTX_FALSE='#'
+else
+ PLUGIN_NVPTX_TRUE='#'
+ PLUGIN_NVPTX_FALSE=
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define PLUGIN_NVPTX $PLUGIN_NVPTX
+_ACEOF
+
+
+
# Check for functions needed.
for ac_func in getloadavg clock_gettime strtoull
do :
@@ -16241,43 +16436,6 @@ else
multilib_arg=
fi
-# Get accel target and path to install tree of accel compiler
-offload_additional_options=
-offload_additional_lib_paths=
-offload_targets=
-if test x"$enable_offload_targets" != x; then
- for tgt in `echo $enable_offload_targets | sed -e 's#,# #g'`; do
- tgt_dir=`echo $tgt | grep '=' | sed 's/.*=//'`
- tgt=`echo $tgt | sed 's/=.*//'`
- case $tgt in
- *-intelmic-* | *-intelmicemul-*)
- tgt_name="intelmic" ;;
- *)
- as_fn_error "unknown offload target specified" "$LINENO" 5 ;;
- esac
- if test x"$offload_targets" = x; then
- offload_targets=$tgt_name
- else
- offload_targets=$offload_targets,$tgt_name
- fi
- if test x"$tgt_dir" != x; then
- offload_additional_options="$offload_additional_options -B$tgt_dir/libexec/gcc/\$(target_alias)/\$(gcc_version) -B$tgt_dir/bin"
- offload_additional_lib_paths="$offload_additional_lib_paths:$tgt_dir/lib64:$tgt_dir/lib:$tgt_dir/lib32"
- else
- offload_additional_options="$offload_additional_options -B\$(libexecdir)/gcc/\$(target_alias)/\$(gcc_version) -B\$(bindir)"
- offload_additional_lib_paths="$offload_additional_lib_paths:$toolexeclibdir"
- fi
- done
-fi
-
-cat >>confdefs.h <<_ACEOF
-#define OFFLOAD_TARGETS "$offload_targets"
-_ACEOF
-
-
-
-
-
# Set up the set of libraries that we need to link against for libgomp.
# Note that the GOMP_SELF_SPEC in gcc.c may force -pthread,
# which will force linkage against -lpthread (or equivalent for the system).
@@ -16395,6 +16553,8 @@ ac_config_files="$ac_config_files omp.h omp_lib.h omp_lib.f90 libgomp_f.h"
ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec"
+ac_config_files="$ac_config_files testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in"
+
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
# tests run on this system so they can be shared between configure
@@ -16520,6 +16680,10 @@ if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then
as_fn_error "conditional \"MAINTAINER_MODE\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${PLUGIN_NVPTX_TRUE}" && test -z "${PLUGIN_NVPTX_FALSE}"; then
+ as_fn_error "conditional \"PLUGIN_NVPTX\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
if test -z "${LIBGOMP_BUILD_VERSIONED_SHLIB_TRUE}" && test -z "${LIBGOMP_BUILD_VERSIONED_SHLIB_FALSE}"; then
as_fn_error "conditional \"LIBGOMP_BUILD_VERSIONED_SHLIB\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
@@ -17535,6 +17699,7 @@ do
"Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
"testsuite/Makefile") CONFIG_FILES="$CONFIG_FILES testsuite/Makefile" ;;
"libgomp.spec") CONFIG_FILES="$CONFIG_FILES libgomp.spec" ;;
+ "testsuite/libgomp-test-support.pt.exp") CONFIG_FILES="$CONFIG_FILES testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in" ;;
*) as_fn_error "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
esac
diff --git a/libgomp/configure.ac b/libgomp/configure.ac
index 8ed1bae..4687b01 100644
--- a/libgomp/configure.ac
+++ b/libgomp/configure.ac
@@ -193,12 +193,7 @@ AC_LINK_IFELSE(
[],
[AC_MSG_ERROR([Pthreads are required to build libgomp])])])
-plugin_support=yes
-AC_CHECK_LIB(dl, dlsym, , [plugin_support=no])
-if test x"$plugin_support" = xyes; then
- AC_DEFINE(PLUGIN_SUPPORT, 1,
- [Define if all infrastructure, needed for plugins, is supported.])
-fi
+m4_include([plugin/configfrag.ac])
# Check for functions needed.
AC_CHECK_FUNCS(getloadavg clock_gettime strtoull)
@@ -283,40 +278,6 @@ else
multilib_arg=
fi
-# Get accel target and path to install tree of accel compiler
-offload_additional_options=
-offload_additional_lib_paths=
-offload_targets=
-if test x"$enable_offload_targets" != x; then
- for tgt in `echo $enable_offload_targets | sed -e 's#,# #g'`; do
- tgt_dir=`echo $tgt | grep '=' | sed 's/.*=//'`
- tgt=`echo $tgt | sed 's/=.*//'`
- case $tgt in
- *-intelmic-* | *-intelmicemul-*)
- tgt_name="intelmic" ;;
- *)
- AC_MSG_ERROR([unknown offload target specified]) ;;
- esac
- if test x"$offload_targets" = x; then
- offload_targets=$tgt_name
- else
- offload_targets=$offload_targets,$tgt_name
- fi
- if test x"$tgt_dir" != x; then
- offload_additional_options="$offload_additional_options -B$tgt_dir/libexec/gcc/\$(target_alias)/\$(gcc_version) -B$tgt_dir/bin"
- offload_additional_lib_paths="$offload_additional_lib_paths:$tgt_dir/lib64:$tgt_dir/lib:$tgt_dir/lib32"
- else
- offload_additional_options="$offload_additional_options -B\$(libexecdir)/gcc/\$(target_alias)/\$(gcc_version) -B\$(bindir)"
- offload_additional_lib_paths="$offload_additional_lib_paths:$toolexeclibdir"
- fi
- done
-fi
-AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets",
- [Define to hold the list of target names suitable for offloading.])
-AC_SUBST(offload_targets)
-AC_SUBST(offload_additional_options)
-AC_SUBST(offload_additional_lib_paths)
-
# Set up the set of libraries that we need to link against for libgomp.
# Note that the GOMP_SELF_SPEC in gcc.c may force -pthread,
# which will force linkage against -lpthread (or equivalent for the system).
@@ -391,4 +352,5 @@ CFLAGS="$save_CFLAGS"
AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h)
AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec)
+AC_CONFIG_FILES([testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in])
AC_OUTPUT
diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt
index ebd9be9..2ef4926 100644
--- a/libgomp/configure.tgt
+++ b/libgomp/configure.tgt
@@ -27,7 +27,7 @@ fi
config_path="posix"
# Check for futex enabled all at once.
-if test $enable_linux_futex = yes; then
+if test x$enable_linux_futex = xyes; then
case "${target}" in
aarch64*-*-linux*)
diff --git a/libgomp/env.c b/libgomp/env.c
index b05b73a..6b5e963 100644
--- a/libgomp/env.c
+++ b/libgomp/env.c
@@ -28,6 +28,7 @@
#include "libgomp.h"
#include "libgomp_f.h"
+#include "oacc-int.h"
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
@@ -77,6 +78,9 @@ char *gomp_bind_var_list;
unsigned long gomp_bind_var_list_len;
void **gomp_places_list;
unsigned long gomp_places_list_len;
+int gomp_debug_var;
+char *goacc_device_type;
+int goacc_device_num;
/* Parse the OMP_SCHEDULE environment variable. */
@@ -1012,6 +1016,16 @@ parse_affinity (bool ignore)
return false;
}
+static void
+parse_acc_device_type (void)
+{
+ const char *env = getenv ("ACC_DEVICE_TYPE");
+
+ if (env && *env != '\0')
+ goacc_device_type = strdup (env);
+ else
+ goacc_device_type = NULL;
+}
static void
handle_omp_display_env (unsigned long stacksize, int wait_policy)
@@ -1182,6 +1196,7 @@ initialize_env (void)
gomp_global_icv.thread_limit_var
= thread_limit_var > INT_MAX ? UINT_MAX : thread_limit_var;
}
+ parse_int ("GOMP_DEBUG", &gomp_debug_var, true);
#ifndef HAVE_SYNC_BUILTINS
gomp_mutex_init (&gomp_managed_threads_lock);
#endif
@@ -1272,6 +1287,15 @@ initialize_env (void)
}
handle_omp_display_env (stacksize, wait_policy);
+
+ /* OpenACC. */
+
+ if (!parse_int ("ACC_DEVICE_NUM", &goacc_device_num, true))
+ goacc_device_num = 0;
+
+ parse_acc_device_type ();
+
+ goacc_runtime_initialize ();
}
diff --git a/libgomp/error.c b/libgomp/error.c
index e61d82f..094c24a 100644
--- a/libgomp/error.c
+++ b/libgomp/error.c
@@ -36,7 +36,26 @@
#include <stdlib.h>
-static void
+#undef gomp_vdebug
+void
+gomp_vdebug (int kind __attribute__ ((unused)), const char *msg, va_list list)
+{
+ if (gomp_debug_var)
+ vfprintf (stderr, msg, list);
+}
+
+#undef gomp_debug
+void
+gomp_debug (int kind, const char *msg, ...)
+{
+ va_list list;
+
+ va_start (list, msg);
+ gomp_vdebug (kind, msg, list);
+ va_end (list);
+}
+
+void
gomp_verror (const char *fmt, va_list list)
{
fputs ("\nlibgomp: ", stderr);
@@ -55,13 +74,18 @@ gomp_error (const char *fmt, ...)
}
void
+gomp_vfatal (const char *fmt, va_list list)
+{
+ gomp_verror (fmt, list);
+ exit (EXIT_FAILURE);
+}
+
+void
gomp_fatal (const char *fmt, ...)
{
va_list list;
va_start (list, fmt);
- gomp_verror (fmt, list);
+ gomp_vfatal (fmt, list);
va_end (list);
-
- exit (EXIT_FAILURE);
}
diff --git a/libgomp/libgomp-plugin.c b/libgomp/libgomp-plugin.c
new file mode 100644
index 0000000..ffb22e9
--- /dev/null
+++ b/libgomp/libgomp-plugin.c
@@ -0,0 +1,80 @@
+/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Exported (non-hidden) functions exposing libgomp interface for plugins. */
+
+#include <stdlib.h>
+
+#include "libgomp.h"
+#include "libgomp-plugin.h"
+
+void *
+GOMP_PLUGIN_malloc (size_t size)
+{
+ return gomp_malloc (size);
+}
+
+void *
+GOMP_PLUGIN_malloc_cleared (size_t size)
+{
+ return gomp_malloc_cleared (size);
+}
+
+void *
+GOMP_PLUGIN_realloc (void *ptr, size_t size)
+{
+ return gomp_realloc (ptr, size);
+}
+
+void
+GOMP_PLUGIN_debug (int kind, const char *msg, ...)
+{
+ va_list ap;
+
+ va_start (ap, msg);
+ gomp_debug (kind, msg, ap);
+ va_end (ap);
+}
+
+void
+GOMP_PLUGIN_error (const char *msg, ...)
+{
+ va_list ap;
+
+ va_start (ap, msg);
+ gomp_verror (msg, ap);
+ va_end (ap);
+}
+
+void
+GOMP_PLUGIN_fatal (const char *msg, ...)
+{
+ va_list ap;
+
+ va_start (ap, msg);
+ gomp_vfatal (msg, ap);
+ va_end (ap);
+}
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
new file mode 100644
index 0000000..d9cbff5
--- /dev/null
+++ b/libgomp/libgomp-plugin.h
@@ -0,0 +1,80 @@
+/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* An interface to various libgomp-internal functions for use by plugins. */
+
+#ifndef LIBGOMP_PLUGIN_H
+#define LIBGOMP_PLUGIN_H 1
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Capabilities of offloading devices. */
+#define GOMP_OFFLOAD_CAP_SHARED_MEM (1 << 0)
+#define GOMP_OFFLOAD_CAP_NATIVE_EXEC (1 << 1)
+#define GOMP_OFFLOAD_CAP_OPENMP_400 (1 << 2)
+#define GOMP_OFFLOAD_CAP_OPENACC_200 (1 << 3)
+
+/* Type of offload target device. Keep in sync with include/gomp-constants.h. */
+enum offload_target_type
+{
+ OFFLOAD_TARGET_TYPE_HOST = 2,
+ OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3,
+ OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5,
+ OFFLOAD_TARGET_TYPE_INTEL_MIC = 6
+};
+
+/* Auxiliary struct, used for transferring a host-target address range mapping
+ from plugin to libgomp. */
+struct mapping_table
+{
+ uintptr_t host_start;
+ uintptr_t host_end;
+ uintptr_t tgt_start;
+ uintptr_t tgt_end;
+};
+
+/* Miscellaneous functions. */
+extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc));
+extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc));
+extern void *GOMP_PLUGIN_realloc (void *, size_t);
+
+extern void GOMP_PLUGIN_debug (int, const char *, ...)
+ __attribute__ ((format (printf, 2, 3)));
+extern void GOMP_PLUGIN_error (const char *, ...)
+ __attribute__ ((format (printf, 1, 2)));
+extern void GOMP_PLUGIN_fatal (const char *, ...)
+ __attribute__ ((noreturn, format (printf, 1, 2)));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 05f3496..3089401 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -24,9 +24,10 @@
<http://www.gnu.org/licenses/>. */
/* This file contains data types and function declarations that are not
- part of the official OpenMP user interface. There are declarations
- in here that are part of the GNU OpenMP ABI, in that the compiler is
- required to know about them and use them.
+ part of the official OpenACC or OpenMP user interfaces. There are
+ declarations in here that are part of the GNU Offloading and Multi
+ Processing ABI, in that the compiler is required to know about them
+ and use them.
The convention is that the all caps prefix "GOMP" is used group items
that are part of the external ABI, and the lower case prefix "gomp"
@@ -37,10 +38,12 @@
#include "config.h"
#include "gstdint.h"
+#include "libgomp-plugin.h"
#include <pthread.h>
#include <stdbool.h>
#include <stdlib.h>
+#include <stdarg.h>
#ifdef HAVE_ATTRIBUTE_VISIBILITY
# pragma GCC visibility push(hidden)
@@ -221,6 +224,7 @@ struct gomp_team_state
};
struct target_mem_desc;
+struct gomp_memory_mapping;
/* These are the OpenMP 4.0 Internal Control Variables described in
section 2.3.1. Those described as having one copy per task are
@@ -254,6 +258,9 @@ extern char *gomp_bind_var_list;
extern unsigned long gomp_bind_var_list_len;
extern void **gomp_places_list;
extern unsigned long gomp_places_list_len;
+extern int gomp_debug_var;
+extern int goacc_device_num;
+extern char *goacc_device_type;
enum gomp_task_kind
{
@@ -533,10 +540,26 @@ extern void *gomp_realloc (void *, size_t);
/* error.c */
+extern void gomp_vdebug (int, const char *, va_list);
+extern void gomp_debug (int, const char *, ...)
+ __attribute__ ((format (printf, 2, 3)));
+#define gomp_vdebug(KIND, FMT, VALIST) \
+ do { \
+ if (__builtin_expect (gomp_debug_var, 0)) \
+ (gomp_vdebug) ((KIND), (FMT), (VALIST)); \
+ } while (0)
+#define gomp_debug(KIND, ...) \
+ do { \
+ if (__builtin_expect (gomp_debug_var, 0)) \
+ (gomp_debug) ((KIND), __VA_ARGS__); \
+ } while (0)
+extern void gomp_verror (const char *, va_list);
extern void gomp_error (const char *, ...)
- __attribute__((format (printf, 1, 2)));
+ __attribute__ ((format (printf, 1, 2)));
+extern void gomp_vfatal (const char *, va_list)
+ __attribute__ ((noreturn));
extern void gomp_fatal (const char *, ...)
- __attribute__((noreturn, format (printf, 1, 2)));
+ __attribute__ ((noreturn, format (printf, 1, 2)));
/* iter.c */
@@ -607,8 +630,192 @@ extern void gomp_free_thread (void *);
/* target.c */
+extern void gomp_init_targets_once (void);
extern int gomp_get_num_devices (void);
+typedef struct splay_tree_node_s *splay_tree_node;
+typedef struct splay_tree_s *splay_tree;
+typedef struct splay_tree_key_s *splay_tree_key;
+
+struct target_mem_desc {
+ /* Reference count. */
+ uintptr_t refcount;
+ /* All the splay nodes allocated together. */
+ splay_tree_node array;
+ /* Start of the target region. */
+ uintptr_t tgt_start;
+ /* End of the targer region. */
+ uintptr_t tgt_end;
+ /* Handle to free. */
+ void *to_free;
+ /* Previous target_mem_desc. */
+ struct target_mem_desc *prev;
+ /* Number of items in following list. */
+ size_t list_count;
+
+ /* Corresponding target device descriptor. */
+ struct gomp_device_descr *device_descr;
+
+ /* Memory mapping info for the thread that created this descriptor. */
+ struct gomp_memory_mapping *mem_map;
+
+ /* List of splay keys to remove (or decrease refcount)
+ at the end of region. */
+ splay_tree_key list[];
+};
+
+struct splay_tree_key_s {
+ /* Address of the host object. */
+ uintptr_t host_start;
+ /* Address immediately after the host object. */
+ uintptr_t host_end;
+ /* Descriptor of the target memory. */
+ struct target_mem_desc *tgt;
+ /* Offset from tgt->tgt_start to the start of the target object. */
+ uintptr_t tgt_offset;
+ /* Reference count. */
+ uintptr_t refcount;
+ /* Asynchronous reference count. */
+ uintptr_t async_refcount;
+ /* True if data should be copied from device to host at the end. */
+ bool copy_from;
+};
+
+#include "splay-tree.h"
+
+/* Information about mapped memory regions (per device/context). */
+
+struct gomp_memory_mapping
+{
+ /* Mutex for operating with the splay tree and other shared structures. */
+ gomp_mutex_t lock;
+
+ /* True when tables have been added to this memory map. */
+ bool is_initialized;
+
+ /* Splay tree containing information about mapped memory regions. */
+ struct splay_tree_s splay_tree;
+};
+
+typedef struct acc_dispatch_t
+{
+ /* This is a linked list of data mapped using the
+ acc_map_data/acc_unmap_data or "acc enter data"/"acc exit data" pragmas.
+ Unlike mapped_data in the goacc_thread struct, unmapping can
+ happen out-of-order with respect to mapping. */
+ /* This is guarded by the lock in the "outer" struct gomp_device_descr. */
+ struct target_mem_desc *data_environ;
+
+ /* Extra information required for a device instance by a given target. */
+ /* This is guarded by the lock in the "outer" struct gomp_device_descr. */
+ void *target_data;
+
+ /* Open or close a device instance. */
+ void *(*open_device_func) (int n);
+ int (*close_device_func) (void *h);
+
+ /* Set or get the device number. */
+ int (*get_device_num_func) (void);
+ void (*set_device_num_func) (int);
+
+ /* Execute. */
+ void (*exec_func) (void (*) (void *), size_t, void **, void **, size_t *,
+ unsigned short *, int, int, int, int, void *);
+
+ /* Async cleanup callback registration. */
+ void (*register_async_cleanup_func) (void *);
+
+ /* Asynchronous routines. */
+ int (*async_test_func) (int);
+ int (*async_test_all_func) (void);
+ void (*async_wait_func) (int);
+ void (*async_wait_async_func) (int, int);
+ void (*async_wait_all_func) (void);
+ void (*async_wait_all_async_func) (int);
+ void (*async_set_async_func) (int);
+
+ /* Create/destroy TLS data. */
+ void *(*create_thread_data_func) (void *);
+ void (*destroy_thread_data_func) (void *);
+
+ /* NVIDIA target specific routines. */
+ struct {
+ void *(*get_current_device_func) (void);
+ void *(*get_current_context_func) (void);
+ void *(*get_stream_func) (int);
+ int (*set_stream_func) (int, void *);
+ } cuda;
+} acc_dispatch_t;
+
+/* This structure describes accelerator device.
+ It contains name of the corresponding libgomp plugin, function handlers for
+ interaction with the device, ID-number of the device, and information about
+ mapped memory. */
+struct gomp_device_descr
+{
+ /* Immutable data, which is only set during initialization, and which is not
+ guarded by the lock. */
+
+ /* The name of the device. */
+ const char *name;
+
+ /* Capabilities of device (supports OpenACC, OpenMP). */
+ unsigned int capabilities;
+
+ /* This is the ID number of device among devices of the same type. */
+ int target_id;
+
+ /* This is the TYPE of device. */
+ enum offload_target_type type;
+
+ /* Function handlers. */
+ const char *(*get_name_func) (void);
+ unsigned int (*get_caps_func) (void);
+ int (*get_type_func) (void);
+ int (*get_num_devices_func) (void);
+ void (*register_image_func) (void *, void *);
+ void (*init_device_func) (int);
+ void (*fini_device_func) (int);
+ int (*get_table_func) (int, struct mapping_table **);
+ void *(*alloc_func) (int, size_t);
+ void (*free_func) (int, void *);
+ void *(*dev2host_func) (int, void *, const void *, size_t);
+ void *(*host2dev_func) (int, void *, const void *, size_t);
+ void (*run_func) (int, void *, void *);
+
+ /* Memory-mapping info for this device instance. */
+ /* Uses a separate lock. */
+ struct gomp_memory_mapping mem_map;
+
+ /* Mutex for the mutable data. */
+ gomp_mutex_t lock;
+
+ /* Set to true when device is initialized. */
+ bool is_initialized;
+
+ /* True when offload regions have been registered with this device. */
+ bool offload_regions_registered;
+
+ /* OpenACC-specific data and functions. */
+ /* This is mutable because of its mutable data_environ and target_data
+ members. */
+ acc_dispatch_t openacc;
+};
+
+extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *);
+extern void gomp_acc_remove_pointer (void *, bool, int, int);
+
+extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
+ size_t, void **, void **,
+ size_t *, void *, bool, bool);
+extern void gomp_copy_from_async (struct target_mem_desc *);
+extern void gomp_unmap_vars (struct target_mem_desc *, bool);
+extern void gomp_init_device (struct gomp_device_descr *);
+extern void gomp_init_tables (struct gomp_device_descr *,
+ struct gomp_memory_mapping *);
+extern void gomp_free_memmap (struct gomp_memory_mapping *);
+extern void gomp_fini_device (struct gomp_device_descr *);
+
/* work.c */
extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned);
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index f36df23..f44174e 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -232,3 +232,107 @@ GOMP_4.0.1 {
global:
GOMP_offload_register;
} GOMP_4.0;
+
+OACC_2.0 {
+ global:
+ acc_get_num_devices;
+ acc_get_num_devices_h_;
+ acc_set_device_type;
+ acc_set_device_type_h_;
+ acc_get_device_type;
+ acc_get_device_type_h_;
+ acc_set_device_num;
+ acc_set_device_num_h_;
+ acc_get_device_num;
+ acc_get_device_num_h_;
+ acc_async_test;
+ acc_async_test_h_;
+ acc_async_test_all;
+ acc_async_test_all_h_;
+ acc_wait;
+ acc_wait_h_;
+ acc_wait_async;
+ acc_wait_async_h_;
+ acc_wait_all;
+ acc_wait_all_h_;
+ acc_wait_all_async;
+ acc_wait_all_async_h_;
+ acc_init;
+ acc_init_h_;
+ acc_shutdown;
+ acc_shutdown_h_;
+ acc_on_device;
+ acc_on_device_h_;
+ acc_malloc;
+ acc_free;
+ acc_copyin;
+ acc_copyin_32_h_;
+ acc_copyin_64_h_;
+ acc_copyin_array_h_;
+ acc_present_or_copyin;
+ acc_present_or_copyin_32_h_;
+ acc_present_or_copyin_64_h_;
+ acc_present_or_copyin_array_h_;
+ acc_create;
+ acc_create_32_h_;
+ acc_create_64_h_;
+ acc_create_array_h_;
+ acc_present_or_create;
+ acc_present_or_create_32_h_;
+ acc_present_or_create_64_h_;
+ acc_present_or_create_array_h_;
+ acc_copyout;
+ acc_copyout_32_h_;
+ acc_copyout_64_h_;
+ acc_copyout_array_h_;
+ acc_delete;
+ acc_delete_32_h_;
+ acc_delete_64_h_;
+ acc_delete_array_h_;
+ acc_update_device;
+ acc_update_device_32_h_;
+ acc_update_device_64_h_;
+ acc_update_device_array_h_;
+ acc_update_self;
+ acc_update_self_32_h_;
+ acc_update_self_64_h_;
+ acc_update_self_array_h_;
+ acc_map_data;
+ acc_unmap_data;
+ acc_deviceptr;
+ acc_hostptr;
+ acc_is_present;
+ acc_is_present_32_h_;
+ acc_is_present_64_h_;
+ acc_is_present_array_h_;
+ acc_memcpy_to_device;
+ acc_memcpy_from_device;
+ acc_get_current_cuda_device;
+ acc_get_current_cuda_context;
+ acc_get_cuda_stream;
+ acc_set_cuda_stream;
+};
+
+GOACC_2.0 {
+ global:
+ GOACC_data_end;
+ GOACC_data_start;
+ GOACC_enter_exit_data;
+ GOACC_parallel;
+ GOACC_update;
+ GOACC_wait;
+ GOACC_get_thread_num;
+ GOACC_get_num_threads;
+};
+
+GOMP_PLUGIN_1.0 {
+ global:
+ GOMP_PLUGIN_malloc;
+ GOMP_PLUGIN_malloc_cleared;
+ GOMP_PLUGIN_realloc;
+ GOMP_PLUGIN_debug;
+ GOMP_PLUGIN_error;
+ GOMP_PLUGIN_fatal;
+ GOMP_PLUGIN_async_unmap_vars;
+ GOMP_PLUGIN_acc_thread;
+};
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index b7306f1..6c7f1ae 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -35,8 +35,9 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
@end direntry
This manual documents libgomp, the GNU Offloading and Multi Processing
-Runtime library. This is the GNU implementation of the OpenMP API for
-multi-platform shared-memory parallel programming in C/C++ and Fortran.
+Runtime library. This is the GNU implementation of the OpenMP and
+OpenACC APIs for parallel and accelerator programming in C/C++ and
+Fortran.
Published by the Free Software Foundation
51 Franklin Street, Fifth Floor
@@ -50,7 +51,7 @@ Boston, MA 02110-1301 USA
@titlepage
@title GNU Offloading and Multi Processing Runtime Library
-@subtitle The GNU OpenMP Implementation
+@subtitle The GNU OpenMP and OpenACC Implementation
@page
@vskip 0pt plus 1filll
@comment For the @value{version-GCC} Version*
@@ -72,11 +73,18 @@ Boston, MA 02110-1301, USA@*
@cindex Introduction
This manual documents the usage of libgomp, the GNU Offloading and
-Multi Processing Runtime Library. This is the GNU implementation of the
-@uref{http://www.openmp.org, OpenMP} Application Programming Interface (API)
-for multi-platform shared-memory parallel programming in C/C++ and Fortran.
+Multi Processing Runtime Library. This includes the GNU
+implementation of the @uref{http://www.openmp.org, OpenMP} Application
+Programming Interface (API) for multi-platform shared-memory parallel
+programming in C/C++ and Fortran, and the GNU implementation of the
+@uref{http://www.openacc.org/, OpenACC} Application Programming
+Interface (API) for offloading of code to accelerator devices in C/C++
+and Fortran.
-Originally, libgomp was known as the GNU OpenMP Runtime Library.
+Originally, libgomp implemented the GNU OpenMP Runtime Library. Based
+on this, support for OpenACC and offloading (both OpenACC and OpenMP
+4's target construct) has been added later on, and the library's name
+changed to GNU Offloading and Multi Processing Runtime Library.
@@ -1312,6 +1320,7 @@ beginning with @env{GOMP_} are GNU extensions.
* OMP_THREAD_LIMIT:: Set the maximum number of threads
* OMP_WAIT_POLICY:: How waiting threads are handled
* GOMP_CPU_AFFINITY:: Bind threads to specific CPUs
+* GOMP_DEBUG:: Enable debugging output
* GOMP_STACKSIZE:: Set default thread stack size
* GOMP_SPINCOUNT:: Set the busy-wait spin count
@end menu
@@ -1631,6 +1640,20 @@ If both @env{GOMP_CPU_AFFINITY} and @env{OMP_PROC_BIND} are set,
+@node GOMP_DEBUG
+@section @env{GOMP_DEBUG} -- Enable debugging output
+@cindex Environment Variable
+@table @asis
+@item @emph{Description}:
+Enable debugging output. The variable should be set to @code{0}
+(disabled, also the default if not set), or @code{1} (enabled).
+
+If enabled, some debugging output will be printed during execution.
+This is currently not specified in more detail, and subject to change.
+@end table
+
+
+
@node GOMP_STACKSIZE
@section @env{GOMP_STACKSIZE} -- Set default thread stack size
@cindex Environment Variable
@@ -2080,7 +2103,8 @@ becomes
Bugs in the GNU Offloading and Multi Processing Runtime Library should
be reported via @uref{http://gcc.gnu.org/bugzilla/, Bugzilla}. Please add
-"openmp" to the keywords field in the bug report.
+"openacc", or "openmp", or both to the keywords field in the bug
+report, as appropriate.
diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h
index 56a4a97..c1e4e63 100644
--- a/libgomp/libgomp_g.h
+++ b/libgomp/libgomp_g.h
@@ -215,4 +215,20 @@ extern void GOMP_target_update (int, const void *,
size_t, void **, size_t *, unsigned char *);
extern void GOMP_teams (unsigned int, unsigned int);
+/* oacc-parallel.c */
+
+extern void GOACC_data_start (int, const void *,
+ size_t, void **, size_t *, unsigned short *);
+extern void GOACC_data_end (void);
+extern void GOACC_enter_exit_data (int, const void *, size_t, void **,
+ size_t *, unsigned short *, int, int, ...);
+extern void GOACC_parallel (int, void (*) (void *), const void *, size_t,
+ void **, size_t *, unsigned short *, int, int, int,
+ int, int, ...);
+extern void GOACC_update (int, const void *, size_t, void **, size_t *,
+ unsigned short *, int, int, ...);
+extern void GOACC_wait (int, int, ...);
+extern int GOACC_get_num_threads (void);
+extern int GOACC_get_thread_num (void);
+
#endif /* LIBGOMP_G_H */
diff --git a/libgomp/oacc-async.c b/libgomp/oacc-async.c
new file mode 100644
index 0000000..08b7c5e
--- /dev/null
+++ b/libgomp/oacc-async.c
@@ -0,0 +1,77 @@
+/* OpenACC Runtime Library Definitions.
+
+ Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+#include "openacc.h"
+#include "libgomp.h"
+#include "oacc-int.h"
+
+int
+acc_async_test (int async)
+{
+ if (async < acc_async_sync)
+ gomp_fatal ("invalid async argument: %d", async);
+
+ return base_dev->openacc.async_test_func (async);
+}
+
+int
+acc_async_test_all (void)
+{
+ return base_dev->openacc.async_test_all_func ();
+}
+
+void
+acc_wait (int async)
+{
+ if (async < acc_async_sync)
+ gomp_fatal ("invalid async argument: %d", async);
+
+ base_dev->openacc.async_wait_func (async);
+}
+
+void
+acc_wait_async (int async1, int async2)
+{
+ base_dev->openacc.async_wait_async_func (async1, async2);
+}
+
+void
+acc_wait_all (void)
+{
+ base_dev->openacc.async_wait_all_func ();
+}
+
+void
+acc_wait_all_async (int async)
+{
+ if (async < acc_async_sync)
+ gomp_fatal ("invalid async argument: %d", async);
+
+ base_dev->openacc.async_wait_all_async_func (async);
+}
diff --git a/libgomp/oacc-cuda.c b/libgomp/oacc-cuda.c
new file mode 100644
index 0000000..c8ef376
--- /dev/null
+++ b/libgomp/oacc-cuda.c
@@ -0,0 +1,84 @@
+/* OpenACC Runtime Library: CUDA support glue.
+
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "openacc.h"
+#include "config.h"
+#include "libgomp.h"
+#include "oacc-int.h"
+
+void *
+acc_get_current_cuda_device (void)
+{
+ void *p = NULL;
+
+ if (base_dev && base_dev->openacc.cuda.get_current_device_func)
+ p = base_dev->openacc.cuda.get_current_device_func ();
+
+ return p;
+}
+
+void *
+acc_get_current_cuda_context (void)
+{
+ void *p = NULL;
+
+ if (base_dev && base_dev->openacc.cuda.get_current_context_func)
+ p = base_dev->openacc.cuda.get_current_context_func ();
+
+ return p;
+}
+
+void *
+acc_get_cuda_stream (int async)
+{
+ void *p = NULL;
+
+ if (async < 0)
+ return p;
+
+ if (base_dev && base_dev->openacc.cuda.get_stream_func)
+ p = base_dev->openacc.cuda.get_stream_func (async);
+
+ return p;
+}
+
+int
+acc_set_cuda_stream (int async, void *stream)
+{
+ int s = -1;
+
+ if (async < 0 || stream == NULL)
+ return 0;
+
+ goacc_lazy_initialize ();
+
+ if (base_dev && base_dev->openacc.cuda.set_stream_func)
+ s = base_dev->openacc.cuda.set_stream_func (async, stream);
+
+ return s;
+}
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
new file mode 100644
index 0000000..6aeb1e7
--- /dev/null
+++ b/libgomp/oacc-host.c
@@ -0,0 +1,100 @@
+/* OpenACC Runtime Library: acc_device_host.
+
+ Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This shares much of the implementation of the plugin-host.c "host_nonshm"
+ plugin. */
+#include "plugin/plugin-host.c"
+
+static struct gomp_device_descr host_dispatch =
+ {
+ .name = "host",
+ .capabilities = (GOMP_OFFLOAD_CAP_OPENACC_200
+ | GOMP_OFFLOAD_CAP_NATIVE_EXEC
+ | GOMP_OFFLOAD_CAP_SHARED_MEM),
+ .target_id = 0,
+ .type = OFFLOAD_TARGET_TYPE_HOST,
+
+ .get_name_func = GOMP_OFFLOAD_get_name,
+ .get_caps_func = GOMP_OFFLOAD_get_caps,
+ .get_type_func = GOMP_OFFLOAD_get_type,
+ .get_num_devices_func = GOMP_OFFLOAD_get_num_devices,
+ .register_image_func = GOMP_OFFLOAD_register_image,
+ .init_device_func = GOMP_OFFLOAD_init_device,
+ .fini_device_func = GOMP_OFFLOAD_fini_device,
+ .get_table_func = GOMP_OFFLOAD_get_table,
+ .alloc_func = GOMP_OFFLOAD_alloc,
+ .free_func = GOMP_OFFLOAD_free,
+ .dev2host_func = GOMP_OFFLOAD_dev2host,
+ .host2dev_func = GOMP_OFFLOAD_host2dev,
+ .run_func = GOMP_OFFLOAD_run,
+
+ .mem_map.is_initialized = false,
+ .mem_map.splay_tree.root = NULL,
+ .is_initialized = false,
+ .offload_regions_registered = false,
+
+ .openacc = {
+ .open_device_func = GOMP_OFFLOAD_openacc_open_device,
+ .close_device_func = GOMP_OFFLOAD_openacc_close_device,
+
+ .get_device_num_func = GOMP_OFFLOAD_openacc_get_device_num,
+ .set_device_num_func = GOMP_OFFLOAD_openacc_set_device_num,
+
+ .exec_func = GOMP_OFFLOAD_openacc_parallel,
+
+ .register_async_cleanup_func
+ = GOMP_OFFLOAD_openacc_register_async_cleanup,
+
+ .async_set_async_func = GOMP_OFFLOAD_openacc_async_set_async,
+ .async_test_func = GOMP_OFFLOAD_openacc_async_test,
+ .async_test_all_func = GOMP_OFFLOAD_openacc_async_test_all,
+ .async_wait_func = GOMP_OFFLOAD_openacc_async_wait,
+ .async_wait_async_func = GOMP_OFFLOAD_openacc_async_wait_async,
+ .async_wait_all_func = GOMP_OFFLOAD_openacc_async_wait_all,
+ .async_wait_all_async_func = GOMP_OFFLOAD_openacc_async_wait_all_async,
+
+ .create_thread_data_func = GOMP_OFFLOAD_openacc_create_thread_data,
+ .destroy_thread_data_func = GOMP_OFFLOAD_openacc_destroy_thread_data,
+
+ .cuda = {
+ .get_current_device_func = NULL,
+ .get_current_context_func = NULL,
+ .get_stream_func = NULL,
+ .set_stream_func = NULL,
+ }
+ }
+ };
+
+/* Register this device type. */
+static __attribute__ ((constructor))
+void goacc_host_init (void)
+{
+ gomp_mutex_init (&host_dispatch.mem_map.lock);
+ gomp_mutex_init (&host_dispatch.lock);
+ goacc_register (&host_dispatch);
+}
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
new file mode 100644
index 0000000..166eb55
--- /dev/null
+++ b/libgomp/oacc-init.c
@@ -0,0 +1,636 @@
+/* OpenACC Runtime initialization routines
+
+ Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "libgomp.h"
+#include "oacc-int.h"
+#include "openacc.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <stdbool.h>
+#include <string.h>
+
+static gomp_mutex_t acc_device_lock;
+
+/* The dispatch table for the current accelerator device. This is global, so
+ you can only have one type of device open at any given time in a program.
+ This is the "base" device in that several devices that use the same
+ dispatch table may be active concurrently: this one (the "zeroth") is used
+ for overall initialisation/shutdown, and other instances -- not necessarily
+ including this one -- may be opened and closed once the base device has
+ been initialized. */
+struct gomp_device_descr *base_dev;
+
+#if defined HAVE_TLS || defined USE_EMUTLS
+__thread struct goacc_thread *goacc_tls_data;
+#else
+pthread_key_t goacc_tls_key;
+#endif
+static pthread_key_t goacc_cleanup_key;
+
+/* Current dispatcher, and how it was initialized */
+static acc_device_t init_key = _ACC_device_hwm;
+
+static struct goacc_thread *goacc_threads;
+static gomp_mutex_t goacc_thread_lock;
+
+/* An array of dispatchers for device types, indexed by the type. This array
+ only references "base" devices, and other instances of the same type are
+ found by simply indexing from each such device (which are stored linearly,
+ grouped by device in target.c:devices). */
+static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 };
+
+attribute_hidden void
+goacc_register (struct gomp_device_descr *disp)
+{
+ /* Only register the 0th device here. */
+ if (disp->target_id != 0)
+ return;
+
+ gomp_mutex_lock (&acc_device_lock);
+
+ assert (acc_device_type (disp->type) != acc_device_none
+ && acc_device_type (disp->type) != acc_device_default
+ && acc_device_type (disp->type) != acc_device_not_host);
+ assert (!dispatchers[disp->type]);
+ dispatchers[disp->type] = disp;
+
+ gomp_mutex_unlock (&acc_device_lock);
+}
+
+/* OpenACC names some things a little differently. */
+
+static const char *
+get_openacc_name (const char *name)
+{
+ if (strcmp (name, "nvptx") == 0)
+ return "nvidia";
+ else
+ return name;
+}
+
+static struct gomp_device_descr *
+resolve_device (acc_device_t d)
+{
+ acc_device_t d_arg = d;
+
+ switch (d)
+ {
+ case acc_device_default:
+ {
+ if (goacc_device_type)
+ {
+ /* Lookup the named device. */
+ while (++d != _ACC_device_hwm)
+ if (dispatchers[d]
+ && !strcasecmp (goacc_device_type,
+ get_openacc_name (dispatchers[d]->name))
+ && dispatchers[d]->get_num_devices_func () > 0)
+ goto found;
+
+ gomp_fatal ("device type %s not supported", goacc_device_type);
+ }
+
+ /* No default device specified, so start scanning for any non-host
+ device that is available. */
+ d = acc_device_not_host;
+ }
+ /* FALLTHROUGH */
+
+ case acc_device_not_host:
+ /* Find the first available device after acc_device_not_host. */
+ while (++d != _ACC_device_hwm)
+ if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0)
+ goto found;
+ if (d_arg == acc_device_default)
+ {
+ d = acc_device_host;
+ goto found;
+ }
+ gomp_fatal ("no device found");
+ break;
+
+ case acc_device_host:
+ break;
+
+ default:
+ if (d > _ACC_device_hwm)
+ gomp_fatal ("device %u out of range", (unsigned)d);
+ break;
+ }
+ found:
+
+ assert (d != acc_device_none
+ && d != acc_device_default
+ && d != acc_device_not_host);
+
+ return dispatchers[d];
+}
+
+/* This is called when plugins have been initialized, and serves to call
+ (indirectly) the target's device_init hook. Calling multiple times without
+ an intervening acc_shutdown_1 call is an error. */
+
+static struct gomp_device_descr *
+acc_init_1 (acc_device_t d)
+{
+ struct gomp_device_descr *acc_dev;
+
+ acc_dev = resolve_device (d);
+
+ if (!acc_dev || acc_dev->get_num_devices_func () <= 0)
+ gomp_fatal ("device %u not supported", (unsigned)d);
+
+ if (acc_dev->is_initialized)
+ gomp_fatal ("device already active");
+
+ /* We need to remember what we were intialized as, to check shutdown etc. */
+ init_key = d;
+
+ gomp_init_device (acc_dev);
+
+ return acc_dev;
+}
+
+static struct goacc_thread *
+goacc_new_thread (void)
+{
+ struct goacc_thread *thr = gomp_malloc (sizeof (struct gomp_thread));
+
+#if defined HAVE_TLS || defined USE_EMUTLS
+ goacc_tls_data = thr;
+#else
+ pthread_setspecific (goacc_tls_key, thr);
+#endif
+
+ pthread_setspecific (goacc_cleanup_key, thr);
+
+ gomp_mutex_lock (&goacc_thread_lock);
+ thr->next = goacc_threads;
+ goacc_threads = thr;
+ gomp_mutex_unlock (&goacc_thread_lock);
+
+ return thr;
+}
+
+static void
+goacc_destroy_thread (void *data)
+{
+ struct goacc_thread *thr = data, *walk, *prev;
+
+ gomp_mutex_lock (&goacc_thread_lock);
+
+ if (thr)
+ {
+ if (base_dev && thr->target_tls)
+ {
+ base_dev->openacc.destroy_thread_data_func (thr->target_tls);
+ thr->target_tls = NULL;
+ }
+
+ assert (!thr->mapped_data);
+
+ /* Remove from thread list. */
+ for (prev = NULL, walk = goacc_threads; walk;
+ prev = walk, walk = walk->next)
+ if (walk == thr)
+ {
+ if (prev == NULL)
+ goacc_threads = walk->next;
+ else
+ prev->next = walk->next;
+
+ free (thr);
+
+ break;
+ }
+
+ assert (walk);
+ }
+
+ gomp_mutex_unlock (&goacc_thread_lock);
+}
+
+/* Open the ORD'th device of the currently-active type (base_dev must be
+ initialised before calling). If ORD is < 0, open the default-numbered
+ device (set by the ACC_DEVICE_NUM environment variable or a call to
+ acc_set_device_num), or leave any currently-opened device as is. "Opening"
+ consists of calling the device's open_device_func hook, and setting up
+ thread-local data (maybe allocating, then initializing with information
+ pertaining to the newly-opened or previously-opened device). */
+
+static void
+lazy_open (int ord)
+{
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev;
+
+ if (thr && thr->dev)
+ {
+ assert (ord < 0 || ord == thr->dev->target_id);
+ return;
+ }
+
+ assert (base_dev);
+
+ if (ord < 0)
+ ord = goacc_device_num;
+
+ /* The OpenACC 2.0 spec leaves the runtime's behaviour when an out-of-range
+ device is requested as implementation-defined (4.2 ACC_DEVICE_NUM).
+ We choose to raise an error in such a case. */
+ if (ord >= base_dev->get_num_devices_func ())
+ gomp_fatal ("device %u does not exist", ord);
+
+ if (!thr)
+ thr = goacc_new_thread ();
+
+ acc_dev = thr->dev = &base_dev[ord];
+
+ assert (acc_dev->target_id == ord);
+
+ thr->saved_bound_dev = NULL;
+ thr->mapped_data = NULL;
+
+ if (!acc_dev->openacc.target_data)
+ acc_dev->openacc.target_data = acc_dev->openacc.open_device_func (ord);
+
+ thr->target_tls
+ = acc_dev->openacc.create_thread_data_func (acc_dev->openacc.target_data);
+
+ acc_dev->openacc.async_set_async_func (acc_async_sync);
+
+ struct gomp_memory_mapping *mem_map = &acc_dev->mem_map;
+ gomp_mutex_lock (&mem_map->lock);
+ if (!mem_map->is_initialized)
+ gomp_init_tables (acc_dev, mem_map);
+ gomp_mutex_unlock (&mem_map->lock);
+}
+
+/* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
+ init/shutdown is per-process or per-thread. We choose per-process. */
+
+void
+acc_init (acc_device_t d)
+{
+ if (!base_dev)
+ gomp_init_targets_once ();
+
+ gomp_mutex_lock (&acc_device_lock);
+
+ base_dev = acc_init_1 (d);
+
+ lazy_open (-1);
+
+ gomp_mutex_unlock (&acc_device_lock);
+}
+
+ialias (acc_init)
+
+static void
+acc_shutdown_1 (acc_device_t d)
+{
+ struct goacc_thread *walk;
+
+ /* We don't check whether d matches the actual device found, because
+ OpenACC 2.0 (3.2.12) says the parameters to the init and this
+ call must match (for the shutdown call anyway, it's silent on
+ others). */
+
+ if (!base_dev)
+ gomp_fatal ("no device initialized");
+ if (d != init_key)
+ gomp_fatal ("device %u(%u) is initialized",
+ (unsigned) init_key, (unsigned) base_dev->type);
+
+ gomp_mutex_lock (&goacc_thread_lock);
+
+ /* Free target-specific TLS data and close all devices. */
+ for (walk = goacc_threads; walk != NULL; walk = walk->next)
+ {
+ if (walk->target_tls)
+ base_dev->openacc.destroy_thread_data_func (walk->target_tls);
+
+ walk->target_tls = NULL;
+
+ /* This would mean the user is shutting down OpenACC in the middle of an
+ "acc data" pragma. Likely not intentional. */
+ if (walk->mapped_data)
+ gomp_fatal ("shutdown in 'acc data' region");
+
+ if (walk->dev)
+ {
+ void *target_data = walk->dev->openacc.target_data;
+ if (walk->dev->openacc.close_device_func (target_data) < 0)
+ gomp_fatal ("failed to close device");
+
+ walk->dev->openacc.target_data = target_data = NULL;
+
+ struct gomp_memory_mapping *mem_map = &walk->dev->mem_map;
+ gomp_mutex_lock (&mem_map->lock);
+ gomp_free_memmap (mem_map);
+ gomp_mutex_unlock (&mem_map->lock);
+
+ walk->dev = NULL;
+ }
+ }
+
+ gomp_mutex_unlock (&goacc_thread_lock);
+
+ gomp_fini_device (base_dev);
+
+ base_dev = NULL;
+}
+
+void
+acc_shutdown (acc_device_t d)
+{
+ gomp_mutex_lock (&acc_device_lock);
+
+ acc_shutdown_1 (d);
+
+ gomp_mutex_unlock (&acc_device_lock);
+}
+
+ialias (acc_shutdown)
+
+/* This function is called after plugins have been initialized. It deals with
+ the "base" device, and is used to prepare the runtime for dealing with a
+ number of such devices (as implemented by some particular plugin). If the
+ argument device type D matches a previous call to the function, return the
+ current base device, else shut the old device down and re-initialize with
+ the new device type. */
+
+static struct gomp_device_descr *
+lazy_init (acc_device_t d)
+{
+ if (base_dev)
+ {
+ /* Re-initializing the same device, do nothing. */
+ if (d == init_key)
+ return base_dev;
+
+ acc_shutdown_1 (init_key);
+ }
+
+ assert (!base_dev);
+
+ return acc_init_1 (d);
+}
+
+/* Ensure that plugins are loaded, initialize and open the (default-numbered)
+ device. */
+
+static void
+lazy_init_and_open (acc_device_t d)
+{
+ if (!base_dev)
+ gomp_init_targets_once ();
+
+ gomp_mutex_lock (&acc_device_lock);
+
+ base_dev = lazy_init (d);
+
+ lazy_open (-1);
+
+ gomp_mutex_unlock (&acc_device_lock);
+}
+
+int
+acc_get_num_devices (acc_device_t d)
+{
+ int n = 0;
+ const struct gomp_device_descr *acc_dev;
+
+ if (d == acc_device_none)
+ return 0;
+
+ if (!base_dev)
+ gomp_init_targets_once ();
+
+ acc_dev = resolve_device (d);
+ if (!acc_dev)
+ return 0;
+
+ n = acc_dev->get_num_devices_func ();
+ if (n < 0)
+ n = 0;
+
+ return n;
+}
+
+ialias (acc_get_num_devices)
+
+void
+acc_set_device_type (acc_device_t d)
+{
+ lazy_init_and_open (d);
+}
+
+ialias (acc_set_device_type)
+
+acc_device_t
+acc_get_device_type (void)
+{
+ acc_device_t res = acc_device_none;
+ const struct gomp_device_descr *dev;
+
+ if (base_dev)
+ res = acc_device_type (base_dev->type);
+ else
+ {
+ gomp_init_targets_once ();
+
+ dev = resolve_device (acc_device_default);
+ res = acc_device_type (dev->type);
+ }
+
+ assert (res != acc_device_default
+ && res != acc_device_not_host);
+
+ return res;
+}
+
+ialias (acc_get_device_type)
+
+int
+acc_get_device_num (acc_device_t d)
+{
+ const struct gomp_device_descr *dev;
+ int num;
+
+ if (d >= _ACC_device_hwm)
+ gomp_fatal ("device %u out of range", (unsigned)d);
+
+ if (!base_dev)
+ gomp_init_targets_once ();
+
+ dev = resolve_device (d);
+ if (!dev)
+ gomp_fatal ("no devices of type %u", d);
+
+ /* We might not have called lazy_open for this host thread yet, in which case
+ the get_device_num_func hook will return -1. */
+ num = dev->openacc.get_device_num_func ();
+ if (num < 0)
+ num = goacc_device_num;
+
+ return num;
+}
+
+ialias (acc_get_device_num)
+
+void
+acc_set_device_num (int n, acc_device_t d)
+{
+ const struct gomp_device_descr *dev;
+ int num_devices;
+
+ if (!base_dev)
+ gomp_init_targets_once ();
+
+ if ((int) d == 0)
+ {
+ int i;
+
+ /* A device setting of zero sets all device types on the system to use
+ the Nth instance of that device type. Only attempt it for initialized
+ devices though. */
+ for (i = acc_device_not_host + 1; i < _ACC_device_hwm; i++)
+ {
+ dev = resolve_device (d);
+ if (dev && dev->is_initialized)
+ dev->openacc.set_device_num_func (n);
+ }
+
+ /* ...and for future calls to acc_init/acc_set_device_type, etc. */
+ goacc_device_num = n;
+ }
+ else
+ {
+ struct goacc_thread *thr = goacc_thread ();
+
+ gomp_mutex_lock (&acc_device_lock);
+
+ base_dev = lazy_init (d);
+
+ num_devices = base_dev->get_num_devices_func ();
+
+ if (n >= num_devices)
+ gomp_fatal ("device %u out of range", n);
+
+ /* If we're changing the device number, de-associate this thread with
+ the device (but don't close the device, since it may be in use by
+ other threads). */
+ if (thr && thr->dev && n != thr->dev->target_id)
+ thr->dev = NULL;
+
+ lazy_open (n);
+
+ gomp_mutex_unlock (&acc_device_lock);
+ }
+}
+
+ialias (acc_set_device_num)
+
+int
+acc_on_device (acc_device_t dev)
+{
+ struct goacc_thread *thr = goacc_thread ();
+
+ if (thr && thr->dev
+ && acc_device_type (thr->dev->type) == acc_device_host_nonshm)
+ return dev == acc_device_host_nonshm || dev == acc_device_not_host;
+
+ /* Just rely on the compiler builtin. */
+ return __builtin_acc_on_device (dev);
+}
+
+ialias (acc_on_device)
+
+attribute_hidden void
+goacc_runtime_initialize (void)
+{
+ gomp_mutex_init (&acc_device_lock);
+
+#if !(defined HAVE_TLS || defined USE_EMUTLS)
+ pthread_key_create (&goacc_tls_key, NULL);
+#endif
+
+ pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread);
+
+ base_dev = NULL;
+
+ goacc_threads = NULL;
+ gomp_mutex_init (&goacc_thread_lock);
+}
+
+/* Compiler helper functions */
+
+attribute_hidden void
+goacc_save_and_set_bind (acc_device_t d)
+{
+ struct goacc_thread *thr = goacc_thread ();
+
+ assert (!thr->saved_bound_dev);
+
+ thr->saved_bound_dev = thr->dev;
+ thr->dev = dispatchers[d];
+}
+
+attribute_hidden void
+goacc_restore_bind (void)
+{
+ struct goacc_thread *thr = goacc_thread ();
+
+ thr->dev = thr->saved_bound_dev;
+ thr->saved_bound_dev = NULL;
+}
+
+/* This is called from any OpenACC support function that may need to implicitly
+ initialize the libgomp runtime. On exit all such initialization will have
+ been done, and both the global ACC_dev and the per-host-thread ACC_memmap
+ pointers will be valid. */
+
+attribute_hidden void
+goacc_lazy_initialize (void)
+{
+ struct goacc_thread *thr = goacc_thread ();
+
+ if (thr && thr->dev)
+ return;
+
+ if (!base_dev)
+ lazy_init_and_open (acc_device_default);
+ else
+ {
+ gomp_mutex_lock (&acc_device_lock);
+ lazy_open (-1);
+ gomp_mutex_unlock (&acc_device_lock);
+ }
+}
diff --git a/libgomp/oacc-int.h b/libgomp/oacc-int.h
new file mode 100644
index 0000000..85619c8
--- /dev/null
+++ b/libgomp/oacc-int.h
@@ -0,0 +1,105 @@
+/* OpenACC Runtime - internal declarations
+
+ Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains data types and function declarations that are not
+ part of the official OpenACC user interface. There are declarations
+ in here that are part of the GNU OpenACC ABI, in that the compiler is
+ required to know about them and use them.
+
+ The convention is that the all caps prefix "GOACC" is used group items
+ that are part of the external ABI, and the lower case prefix "goacc"
+ is used group items that are completely private to the library. */
+
+#ifndef OACC_INT_H
+#define OACC_INT_H 1
+
+#include "openacc.h"
+#include "config.h"
+#include <stddef.h>
+#include <stdbool.h>
+#include <stdarg.h>
+
+#ifdef HAVE_ATTRIBUTE_VISIBILITY
+# pragma GCC visibility push(hidden)
+#endif
+
+static inline enum acc_device_t
+acc_device_type (enum offload_target_type type)
+{
+ return (enum acc_device_t) type;
+}
+
+struct goacc_thread
+{
+ /* The device for the current thread. */
+ struct gomp_device_descr *dev;
+
+ struct gomp_device_descr *saved_bound_dev;
+
+ /* This is a linked list of data mapped by the "acc data" pragma, following
+ strictly push/pop semantics according to lexical scope. */
+ struct target_mem_desc *mapped_data;
+
+ /* These structures form a list: this is the next thread in that list. */
+ struct goacc_thread *next;
+
+ /* Target-specific data (used by plugin). */
+ void *target_tls;
+};
+
+#if defined HAVE_TLS || defined USE_EMUTLS
+extern __thread struct goacc_thread *goacc_tls_data;
+static inline struct goacc_thread *
+goacc_thread (void)
+{
+ return goacc_tls_data;
+}
+#else
+extern pthread_key_t goacc_tls_key;
+static inline struct goacc_thread *
+goacc_thread (void)
+{
+ return pthread_getspecific (goacc_tls_key);
+}
+#endif
+
+void goacc_register (struct gomp_device_descr *) __GOACC_NOTHROW;
+
+/* Current dispatcher. */
+extern struct gomp_device_descr *base_dev;
+
+void goacc_runtime_initialize (void);
+void goacc_save_and_set_bind (acc_device_t);
+void goacc_restore_bind (void);
+void goacc_lazy_initialize (void);
+
+#ifdef HAVE_ATTRIBUTE_VISIBILITY
+# pragma GCC visibility pop
+#endif
+
+#endif
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
new file mode 100644
index 0000000..0096d51
--- /dev/null
+++ b/libgomp/oacc-mem.c
@@ -0,0 +1,585 @@
+/* OpenACC Runtime initialization routines
+
+ Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "openacc.h"
+#include "config.h"
+#include "libgomp.h"
+#include "gomp-constants.h"
+#include "oacc-int.h"
+#include "splay-tree.h"
+#include <stdint.h>
+#include <assert.h>
+
+/* Return block containing [H->S), or NULL if not contained. */
+
+static splay_tree_key
+lookup_host (struct gomp_memory_mapping *mem_map, void *h, size_t s)
+{
+ struct splay_tree_key_s node;
+ splay_tree_key key;
+
+ node.host_start = (uintptr_t) h;
+ node.host_end = (uintptr_t) h + s;
+
+ gomp_mutex_lock (&mem_map->lock);
+
+ key = splay_tree_lookup (&mem_map->splay_tree, &node);
+
+ gomp_mutex_unlock (&mem_map->lock);
+
+ return key;
+}
+
+/* Return block containing [D->S), or NULL if not contained.
+ The list isn't ordered by device address, so we have to iterate
+ over the whole array. This is not expected to be a common
+ operation. */
+
+static splay_tree_key
+lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
+{
+ int i;
+ struct target_mem_desc *t;
+ struct gomp_memory_mapping *mem_map;
+
+ if (!tgt)
+ return NULL;
+
+ mem_map = tgt->mem_map;
+
+ gomp_mutex_lock (&mem_map->lock);
+
+ for (t = tgt; t != NULL; t = t->prev)
+ {
+ if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
+ break;
+ }
+
+ gomp_mutex_unlock (&mem_map->lock);
+
+ if (!t)
+ return NULL;
+
+ for (i = 0; i < t->list_count; i++)
+ {
+ void * offset;
+
+ splay_tree_key k = &t->array[i].key;
+ offset = d - t->tgt_start + k->tgt_offset;
+
+ if (k->host_start + offset <= (void *) k->host_end)
+ return k;
+ }
+
+ return NULL;
+}
+
+/* OpenACC is silent on how memory exhaustion is indicated. We return
+ NULL. */
+
+void *
+acc_malloc (size_t s)
+{
+ if (!s)
+ return NULL;
+
+ goacc_lazy_initialize ();
+
+ struct goacc_thread *thr = goacc_thread ();
+
+ return base_dev->alloc_func (thr->dev->target_id, s);
+}
+
+/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
+ the device address is mapped. We choose to check if it mapped,
+ and if it is, to unmap it. */
+void
+acc_free (void *d)
+{
+ splay_tree_key k;
+ struct goacc_thread *thr = goacc_thread ();
+
+ if (!d)
+ return;
+
+ /* We don't have to call lazy open here, as the ptr value must have
+ been returned by acc_malloc. It's not permitted to pass NULL in
+ (unless you got that null from acc_malloc). */
+ if ((k = lookup_dev (thr->dev->openacc.data_environ, d, 1)))
+ {
+ void *offset;
+
+ offset = d - k->tgt->tgt_start + k->tgt_offset;
+
+ acc_unmap_data ((void *)(k->host_start + offset));
+ }
+
+ base_dev->free_func (thr->dev->target_id, d);
+}
+
+void
+acc_memcpy_to_device (void *d, void *h, size_t s)
+{
+ /* No need to call lazy open here, as the device pointer must have
+ been obtained from a routine that did that. */
+ struct goacc_thread *thr = goacc_thread ();
+
+ base_dev->host2dev_func (thr->dev->target_id, d, h, s);
+}
+
+void
+acc_memcpy_from_device (void *h, void *d, size_t s)
+{
+ /* No need to call lazy open here, as the device pointer must have
+ been obtained from a routine that did that. */
+ struct goacc_thread *thr = goacc_thread ();
+
+ base_dev->dev2host_func (thr->dev->target_id, h, d, s);
+}
+
+/* Return the device pointer that corresponds to host data H. Or NULL
+ if no mapping. */
+
+void *
+acc_deviceptr (void *h)
+{
+ splay_tree_key n;
+ void *d;
+ void *offset;
+
+ goacc_lazy_initialize ();
+
+ struct goacc_thread *thr = goacc_thread ();
+
+ n = lookup_host (&thr->dev->mem_map, h, 1);
+
+ if (!n)
+ return NULL;
+
+ offset = h - n->host_start;
+
+ d = n->tgt->tgt_start + n->tgt_offset + offset;
+
+ return d;
+}
+
+/* Return the host pointer that corresponds to device data D. Or NULL
+ if no mapping. */
+
+void *
+acc_hostptr (void *d)
+{
+ splay_tree_key n;
+ void *h;
+ void *offset;
+
+ goacc_lazy_initialize ();
+
+ struct goacc_thread *thr = goacc_thread ();
+
+ n = lookup_dev (thr->dev->openacc.data_environ, d, 1);
+
+ if (!n)
+ return NULL;
+
+ offset = d - n->tgt->tgt_start + n->tgt_offset;
+
+ h = n->host_start + offset;
+
+ return h;
+}
+
+/* Return 1 if host data [H,+S] is present on the device. */
+
+int
+acc_is_present (void *h, size_t s)
+{
+ splay_tree_key n;
+
+ if (!s || !h)
+ return 0;
+
+ goacc_lazy_initialize ();
+
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+
+ n = lookup_host (&acc_dev->mem_map, h, s);
+
+ if (n && ((uintptr_t)h < n->host_start
+ || (uintptr_t)h + s > n->host_end
+ || s > n->host_end - n->host_start))
+ n = NULL;
+
+ return n != NULL;
+}
+
+/* Create a mapping for host [H,+S] -> device [D,+S] */
+
+void
+acc_map_data (void *h, void *d, size_t s)
+{
+ struct target_mem_desc *tgt;
+ size_t mapnum = 1;
+ void *hostaddrs = h;
+ void *devaddrs = d;
+ size_t sizes = s;
+ unsigned short kinds = GOMP_MAP_ALLOC;
+
+ goacc_lazy_initialize ();
+
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+
+ if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ {
+ if (d != h)
+ gomp_fatal ("cannot map data on shared-memory system");
+
+ tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
+ }
+ else
+ {
+ struct goacc_thread *thr = goacc_thread ();
+
+ if (!d || !h || !s)
+ gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
+ (void *)h, (int)s, (void *)d, (int)s);
+
+ if (lookup_host (&acc_dev->mem_map, h, s))
+ gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
+ (int)s);
+
+ if (lookup_dev (thr->dev->openacc.data_environ, d, s))
+ gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
+ (int)s);
+
+ tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
+ &kinds, true, false);
+ }
+
+ tgt->prev = acc_dev->openacc.data_environ;
+ acc_dev->openacc.data_environ = tgt;
+}
+
+void
+acc_unmap_data (void *h)
+{
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+
+ /* No need to call lazy open, as the address must have been mapped. */
+
+ size_t host_size;
+ splay_tree_key n = lookup_host (&acc_dev->mem_map, h, 1);
+ struct target_mem_desc *t;
+
+ if (!n)
+ gomp_fatal ("%p is not a mapped block", (void *)h);
+
+ host_size = n->host_end - n->host_start;
+
+ if (n->host_start != (uintptr_t) h)
+ gomp_fatal ("[%p,%d] surrounds1 %p",
+ (void *) n->host_start, (int) host_size, (void *) h);
+
+ t = n->tgt;
+
+ if (t->refcount == 2)
+ {
+ struct target_mem_desc *tp;
+
+ /* This is the last reference, so pull the descriptor off the
+ chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
+ freeing the device memory. */
+ t->tgt_end = 0;
+ t->to_free = 0;
+
+ gomp_mutex_lock (&acc_dev->mem_map.lock);
+
+ for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
+ tp = t, t = t->prev)
+ if (n->tgt == t)
+ {
+ if (tp)
+ tp->prev = t->prev;
+ else
+ acc_dev->openacc.data_environ = t->prev;
+
+ break;
+ }
+
+ gomp_mutex_unlock (&acc_dev->mem_map.lock);
+ }
+
+ gomp_unmap_vars (t, true);
+}
+
+#define FLAG_PRESENT (1 << 0)
+#define FLAG_CREATE (1 << 1)
+#define FLAG_COPY (1 << 2)
+
+static void *
+present_create_copy (unsigned f, void *h, size_t s)
+{
+ void *d;
+ splay_tree_key n;
+
+ if (!h || !s)
+ gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
+
+ goacc_lazy_initialize ();
+
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+
+ n = lookup_host (&acc_dev->mem_map, h, s);
+ if (n)
+ {
+ /* Present. */
+ d = (void *) (n->tgt->tgt_start + n->tgt_offset);
+
+ if (!(f & FLAG_PRESENT))
+ gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
+ (void *)h, (int)s, (void *)d, (int)s);
+ if ((h + s) > (void *)n->host_end)
+ gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
+ }
+ else if (!(f & FLAG_CREATE))
+ {
+ gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
+ }
+ else
+ {
+ struct target_mem_desc *tgt;
+ size_t mapnum = 1;
+ unsigned short kinds;
+ void *hostaddrs = h;
+
+ if (f & FLAG_COPY)
+ kinds = GOMP_MAP_TO;
+ else
+ kinds = GOMP_MAP_ALLOC;
+
+ tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
+ false);
+
+ gomp_mutex_lock (&acc_dev->mem_map.lock);
+
+ d = tgt->to_free;
+ tgt->prev = acc_dev->openacc.data_environ;
+ acc_dev->openacc.data_environ = tgt;
+
+ gomp_mutex_unlock (&acc_dev->mem_map.lock);
+ }
+
+ return d;
+}
+
+void *
+acc_create (void *h, size_t s)
+{
+ return present_create_copy (FLAG_CREATE, h, s);
+}
+
+void *
+acc_copyin (void *h, size_t s)
+{
+ return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
+}
+
+void *
+acc_present_or_create (void *h, size_t s)
+{
+ return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
+}
+
+void *
+acc_present_or_copyin (void *h, size_t s)
+{
+ return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
+}
+
+#define FLAG_COPYOUT (1 << 0)
+
+static void
+delete_copyout (unsigned f, void *h, size_t s)
+{
+ size_t host_size;
+ splay_tree_key n;
+ void *d;
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+
+ n = lookup_host (&acc_dev->mem_map, h, s);
+
+ /* No need to call lazy open, as the data must already have been
+ mapped. */
+
+ if (!n)
+ gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
+
+ d = (void *) (n->tgt->tgt_start + n->tgt_offset);
+
+ host_size = n->host_end - n->host_start;
+
+ if (n->host_start != (uintptr_t) h || host_size != s)
+ gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
+ (void *) n->host_start, (int) host_size, (void *) h, (int) s);
+
+ if (f & FLAG_COPYOUT)
+ acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
+
+ acc_unmap_data (h);
+
+ acc_dev->free_func (acc_dev->target_id, d);
+}
+
+void
+acc_delete (void *h , size_t s)
+{
+ delete_copyout (0, h, s);
+}
+
+void acc_copyout (void *h, size_t s)
+{
+ delete_copyout (FLAG_COPYOUT, h, s);
+}
+
+static void
+update_dev_host (int is_dev, void *h, size_t s)
+{
+ splay_tree_key n;
+ void *d;
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+
+ n = lookup_host (&acc_dev->mem_map, h, s);
+
+ /* No need to call lazy open, as the data must already have been
+ mapped. */
+
+ if (!n)
+ gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
+
+ d = (void *) (n->tgt->tgt_start + n->tgt_offset);
+
+ if (is_dev)
+ acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
+ else
+ acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
+}
+
+void
+acc_update_device (void *h, size_t s)
+{
+ update_dev_host (1, h, s);
+}
+
+void
+acc_update_self (void *h, size_t s)
+{
+ update_dev_host (0, h, s);
+}
+
+void
+gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
+ void *kinds)
+{
+ struct target_mem_desc *tgt;
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+
+ gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
+ tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
+ NULL, sizes, kinds, true, false);
+ gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
+ tgt->prev = acc_dev->openacc.data_environ;
+ acc_dev->openacc.data_environ = tgt;
+}
+
+void
+gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
+{
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+ splay_tree_key n;
+ struct target_mem_desc *t;
+ int minrefs = (mapnum == 1) ? 2 : 3;
+
+ n = lookup_host (&acc_dev->mem_map, h, 1);
+
+ if (!n)
+ gomp_fatal ("%p is not a mapped block", (void *)h);
+
+ gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
+
+ t = n->tgt;
+
+ struct target_mem_desc *tp;
+
+ gomp_mutex_lock (&acc_dev->mem_map.lock);
+
+ if (t->refcount == minrefs)
+ {
+ /* This is the last reference, so pull the descriptor off the
+ chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
+ freeing the device memory. */
+ t->tgt_end = 0;
+ t->to_free = 0;
+
+ for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
+ tp = t, t = t->prev)
+ {
+ if (n->tgt == t)
+ {
+ if (tp)
+ tp->prev = t->prev;
+ else
+ acc_dev->openacc.data_environ = t->prev;
+ break;
+ }
+ }
+ }
+
+ if (force_copyfrom)
+ t->list[0]->copy_from = 1;
+
+ gomp_mutex_unlock (&acc_dev->mem_map.lock);
+
+ /* If running synchronously, unmap immediately. */
+ if (async < acc_async_noval)
+ gomp_unmap_vars (t, true);
+ else
+ {
+ gomp_copy_from_async (t);
+ acc_dev->openacc.register_async_cleanup_func (t);
+ }
+
+ gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
+}
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
new file mode 100644
index 0000000..6d5386b
--- /dev/null
+++ b/libgomp/oacc-parallel.c
@@ -0,0 +1,490 @@
+/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file handles OpenACC constructs. */
+
+#include "openacc.h"
+#include "libgomp.h"
+#include "libgomp_g.h"
+#include "gomp-constants.h"
+#include "oacc-int.h"
+#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <alloca.h>
+
+static int
+find_pset (int pos, size_t mapnum, unsigned short *kinds)
+{
+ if (pos + 1 >= mapnum)
+ return 0;
+
+ unsigned char kind = kinds[pos+1] & 0xff;
+
+ return kind == GOMP_MAP_TO_PSET;
+}
+
+
+/* Ensure that the target device for DEVICE_TYPE is initialised (and that
+ plugins have been loaded if appropriate). The ACC_dev variable for the
+ current thread will be set appropriately for the given device type on
+ return. */
+
+attribute_hidden void
+select_acc_device (int device_type)
+{
+ goacc_lazy_initialize ();
+
+ if (device_type == GOMP_DEVICE_HOST_FALLBACK)
+ return;
+
+ if (device_type == acc_device_none)
+ device_type = acc_device_host;
+
+ if (device_type >= 0)
+ {
+ /* NOTE: this will go badly if the surrounding data environment is set up
+ to use a different device type. We'll just have to trust that users
+ know what they're doing... */
+ acc_set_device_type (device_type);
+ }
+}
+
+static void goacc_wait (int async, int num_waits, va_list ap);
+
+void
+GOACC_parallel (int device, void (*fn) (void *), const void *offload_table,
+ size_t mapnum, void **hostaddrs, size_t *sizes,
+ unsigned short *kinds,
+ int num_gangs, int num_workers, int vector_length,
+ int async, int num_waits, ...)
+{
+ bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
+ va_list ap;
+ struct goacc_thread *thr;
+ struct gomp_device_descr *acc_dev;
+ struct target_mem_desc *tgt;
+ void **devaddrs;
+ unsigned int i;
+ struct splay_tree_key_s k;
+ splay_tree_key tgt_fn_key;
+ void (*tgt_fn);
+
+ if (num_gangs != 1)
+ gomp_fatal ("num_gangs (%d) different from one is not yet supported",
+ num_gangs);
+ if (num_workers != 1)
+ gomp_fatal ("num_workers (%d) different from one is not yet supported",
+ num_workers);
+
+ gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
+ __FUNCTION__, mapnum, hostaddrs, sizes, kinds, async);
+
+ select_acc_device (device);
+
+ thr = goacc_thread ();
+ acc_dev = thr->dev;
+
+ /* Host fallback if "if" clause is false or if the current device is set to
+ the host. */
+ if (host_fallback)
+ {
+ goacc_save_and_set_bind (acc_device_host);
+ fn (hostaddrs);
+ goacc_restore_bind ();
+ return;
+ }
+ else if (acc_device_type (acc_dev->type) == acc_device_host)
+ {
+ fn (hostaddrs);
+ return;
+ }
+
+ va_start (ap, num_waits);
+
+ if (num_waits > 0)
+ goacc_wait (async, num_waits, ap);
+
+ va_end (ap);
+
+ acc_dev->openacc.async_set_async_func (async);
+
+ if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
+ {
+ k.host_start = (uintptr_t) fn;
+ k.host_end = k.host_start + 1;
+ gomp_mutex_lock (&acc_dev->mem_map.lock);
+ tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map.splay_tree, &k);
+ gomp_mutex_unlock (&acc_dev->mem_map.lock);
+
+ if (tgt_fn_key == NULL)
+ gomp_fatal ("target function wasn't mapped");
+
+ tgt_fn = (void (*)) tgt_fn_key->tgt->tgt_start;
+ }
+ else
+ tgt_fn = (void (*)) fn;
+
+ tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
+ false);
+
+ devaddrs = alloca (sizeof (void *) * mapnum);
+ for (i = 0; i < mapnum; i++)
+ devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
+ + tgt->list[i]->tgt_offset);
+
+ acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
+ num_gangs, num_workers, vector_length, async,
+ tgt);
+
+ /* If running synchronously, unmap immediately. */
+ if (async < acc_async_noval)
+ gomp_unmap_vars (tgt, true);
+ else
+ {
+ gomp_copy_from_async (tgt);
+ acc_dev->openacc.register_async_cleanup_func (tgt);
+ }
+
+ acc_dev->openacc.async_set_async_func (acc_async_sync);
+}
+
+void
+GOACC_data_start (int device, const void *offload_table, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned short *kinds)
+{
+ bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
+ struct target_mem_desc *tgt;
+
+ gomp_debug (0, "%s: mapnum=%zd, hostaddrs=%p, sizes=%p, kinds=%p\n",
+ __FUNCTION__, mapnum, hostaddrs, sizes, kinds);
+
+ select_acc_device (device);
+
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+
+ /* Host fallback or 'do nothing'. */
+ if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ || host_fallback)
+ {
+ tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
+ tgt->prev = thr->mapped_data;
+ thr->mapped_data = tgt;
+
+ return;
+ }
+
+ gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
+ tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
+ false);
+ gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
+ tgt->prev = thr->mapped_data;
+ thr->mapped_data = tgt;
+}
+
+void
+GOACC_data_end (void)
+{
+ struct goacc_thread *thr = goacc_thread ();
+ struct target_mem_desc *tgt = thr->mapped_data;
+
+ gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
+ thr->mapped_data = tgt->prev;
+ gomp_unmap_vars (tgt, true);
+ gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
+}
+
+void
+GOACC_enter_exit_data (int device, const void *offload_table, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned short *kinds,
+ int async, int num_waits, ...)
+{
+ struct goacc_thread *thr;
+ struct gomp_device_descr *acc_dev;
+ bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
+ bool data_enter = false;
+ size_t i;
+
+ select_acc_device (device);
+
+ thr = goacc_thread ();
+ acc_dev = thr->dev;
+
+ if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ || host_fallback)
+ return;
+
+ if (num_waits > 0)
+ {
+ va_list ap;
+
+ va_start (ap, num_waits);
+
+ goacc_wait (async, num_waits, ap);
+
+ va_end (ap);
+ }
+
+ acc_dev->openacc.async_set_async_func (async);
+
+ /* Determine if this is an "acc enter data". */
+ for (i = 0; i < mapnum; ++i)
+ {
+ unsigned char kind = kinds[i] & 0xff;
+
+ if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
+ continue;
+
+ if (kind == GOMP_MAP_FORCE_ALLOC
+ || kind == GOMP_MAP_FORCE_PRESENT
+ || kind == GOMP_MAP_FORCE_TO)
+ {
+ data_enter = true;
+ break;
+ }
+
+ if (kind == GOMP_MAP_FORCE_DEALLOC
+ || kind == GOMP_MAP_FORCE_FROM)
+ break;
+
+ gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
+ kind);
+ }
+
+ if (data_enter)
+ {
+ for (i = 0; i < mapnum; i++)
+ {
+ unsigned char kind = kinds[i] & 0xff;
+
+ /* Scan for PSETs. */
+ int psets = find_pset (i, mapnum, kinds);
+
+ if (!psets)
+ {
+ switch (kind)
+ {
+ case GOMP_MAP_POINTER:
+ gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
+ &kinds[i]);
+ break;
+ case GOMP_MAP_FORCE_ALLOC:
+ acc_create (hostaddrs[i], sizes[i]);
+ break;
+ case GOMP_MAP_FORCE_PRESENT:
+ acc_present_or_copyin (hostaddrs[i], sizes[i]);
+ break;
+ case GOMP_MAP_FORCE_TO:
+ acc_present_or_copyin (hostaddrs[i], sizes[i]);
+ break;
+ default:
+ gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
+ kind);
+ break;
+ }
+ }
+ else
+ {
+ gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
+ /* Increment 'i' by two because OpenACC requires fortran
+ arrays to be contiguous, so each PSET is associated with
+ one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
+ one MAP_POINTER. */
+ i += 2;
+ }
+ }
+ }
+ else
+ for (i = 0; i < mapnum; ++i)
+ {
+ unsigned char kind = kinds[i] & 0xff;
+
+ int psets = find_pset (i, mapnum, kinds);
+
+ if (!psets)
+ {
+ switch (kind)
+ {
+ case GOMP_MAP_POINTER:
+ gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
+ == GOMP_MAP_FORCE_FROM,
+ async, 1);
+ break;
+ case GOMP_MAP_FORCE_DEALLOC:
+ acc_delete (hostaddrs[i], sizes[i]);
+ break;
+ case GOMP_MAP_FORCE_FROM:
+ acc_copyout (hostaddrs[i], sizes[i]);
+ break;
+ default:
+ gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
+ kind);
+ break;
+ }
+ }
+ else
+ {
+ gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
+ == GOMP_MAP_FORCE_FROM, async, 3);
+ /* See the above comment. */
+ i += 2;
+ }
+ }
+
+ acc_dev->openacc.async_set_async_func (acc_async_sync);
+}
+
+static void
+goacc_wait (int async, int num_waits, va_list ap)
+{
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+ int i;
+
+ assert (num_waits >= 0);
+
+ if (async == acc_async_sync && num_waits == 0)
+ {
+ acc_wait_all ();
+ return;
+ }
+
+ if (async == acc_async_sync && num_waits)
+ {
+ for (i = 0; i < num_waits; i++)
+ {
+ int qid = va_arg (ap, int);
+
+ if (acc_async_test (qid))
+ continue;
+
+ acc_wait (qid);
+ }
+ return;
+ }
+
+ if (async == acc_async_noval && num_waits == 0)
+ {
+ acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
+ return;
+ }
+
+ for (i = 0; i < num_waits; i++)
+ {
+ int qid = va_arg (ap, int);
+
+ if (acc_async_test (qid))
+ continue;
+
+ /* If we're waiting on the same asynchronous queue as we're launching on,
+ the queue itself will order work as required, so there's no need to
+ wait explicitly. */
+ if (qid != async)
+ acc_dev->openacc.async_wait_async_func (qid, async);
+ }
+}
+
+void
+GOACC_update (int device, const void *offload_table, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned short *kinds,
+ int async, int num_waits, ...)
+{
+ bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
+ size_t i;
+
+ select_acc_device (device);
+
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+
+ if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ || host_fallback)
+ return;
+
+ if (num_waits > 0)
+ {
+ va_list ap;
+
+ va_start (ap, num_waits);
+
+ goacc_wait (async, num_waits, ap);
+
+ va_end (ap);
+ }
+
+ acc_dev->openacc.async_set_async_func (async);
+
+ for (i = 0; i < mapnum; ++i)
+ {
+ unsigned char kind = kinds[i] & 0xff;
+
+ switch (kind)
+ {
+ case GOMP_MAP_POINTER:
+ case GOMP_MAP_TO_PSET:
+ break;
+
+ case GOMP_MAP_FORCE_TO:
+ acc_update_device (hostaddrs[i], sizes[i]);
+ break;
+
+ case GOMP_MAP_FORCE_FROM:
+ acc_update_self (hostaddrs[i], sizes[i]);
+ break;
+
+ default:
+ gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
+ break;
+ }
+ }
+
+ acc_dev->openacc.async_set_async_func (acc_async_sync);
+}
+
+void
+GOACC_wait (int async, int num_waits, ...)
+{
+ va_list ap;
+
+ va_start (ap, num_waits);
+
+ goacc_wait (async, num_waits, ap);
+
+ va_end (ap);
+}
+
+int
+GOACC_get_num_threads (void)
+{
+ return 1;
+}
+
+int
+GOACC_get_thread_num (void)
+{
+ return 0;
+}
diff --git a/libgomp/oacc-plugin.c b/libgomp/oacc-plugin.c
new file mode 100644
index 0000000..44cd6d6
--- /dev/null
+++ b/libgomp/oacc-plugin.c
@@ -0,0 +1,48 @@
+/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Initialize and register OpenACC dispatch table from libgomp plugin. */
+
+#include "libgomp.h"
+#include "oacc-plugin.h"
+#include "oacc-int.h"
+
+void
+GOMP_PLUGIN_async_unmap_vars (void *ptr)
+{
+ struct target_mem_desc *tgt = ptr;
+
+ gomp_unmap_vars (tgt, false);
+}
+
+/* Return the target-specific part of the TLS data for the current thread. */
+
+void *
+GOMP_PLUGIN_acc_thread (void)
+{
+ struct goacc_thread *thr = goacc_thread ();
+ return thr ? thr->target_tls : NULL;
+}
diff --git a/libgomp/libgomp_target.h b/libgomp/oacc-plugin.h
index 2e18a64..c60eb9c 100644
--- a/libgomp/libgomp_target.h
+++ b/libgomp/oacc-plugin.h
@@ -1,5 +1,7 @@
/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Contributed by Mentor Embedded.
+
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
@@ -22,24 +24,10 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#ifndef LIBGOMP_TARGET_H
-#define LIBGOMP_TARGET_H 1
-
-/* Type of offload target device. */
-enum offload_target_type
-{
- OFFLOAD_TARGET_TYPE_HOST,
- OFFLOAD_TARGET_TYPE_INTEL_MIC
-};
-
-/* Auxiliary struct, used for transferring a host-target address range mapping
- from plugin to libgomp. */
-struct mapping_table
-{
- uintptr_t host_start;
- uintptr_t host_end;
- uintptr_t tgt_start;
- uintptr_t tgt_end;
-};
-
-#endif /* LIBGOMP_TARGET_H */
+#ifndef OACC_PLUGIN_H
+#define OACC_PLUGIN_H 1
+
+extern void GOMP_PLUGIN_async_unmap_vars (void *);
+extern void *GOMP_PLUGIN_acc_thread (void);
+
+#endif
diff --git a/libgomp/oacc-ptx.h b/libgomp/oacc-ptx.h
new file mode 100644
index 0000000..13ff86f
--- /dev/null
+++ b/libgomp/oacc-ptx.h
@@ -0,0 +1,202 @@
+/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define ABORT_PTX \
+ ".version 3.1\n" \
+ ".target sm_30\n" \
+ ".address_size 64\n" \
+ ".visible .func abort;\n" \
+ ".visible .func abort\n" \
+ "{\n" \
+ "trap;\n" \
+ "ret;\n" \
+ "}\n" \
+ ".visible .func _gfortran_abort;\n" \
+ ".visible .func _gfortran_abort\n" \
+ "{\n" \
+ "trap;\n" \
+ "ret;\n" \
+ "}\n" \
+
+/* Generated with:
+
+ $ echo 'int acc_on_device(int d) { return __builtin_acc_on_device(d); } int acc_on_device_h_(int *d) { return acc_on_device(*d); }' | accel-gcc/xgcc -Baccel-gcc -x c - -o - -S -m64 -O3 -fno-builtin-acc_on_device -fno-inline
+*/
+#define ACC_ON_DEVICE_PTX \
+ " .version 3.1\n" \
+ " .target sm_30\n" \
+ " .address_size 64\n" \
+ ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1);\n" \
+ ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1)\n" \
+ "{\n" \
+ " .reg.u32 %ar1;\n" \
+ ".reg.u32 %retval;\n" \
+ " .reg.u64 %hr10;\n" \
+ " .reg.u32 %r24;\n" \
+ " .reg.u32 %r25;\n" \
+ " .reg.pred %r27;\n" \
+ " .reg.u32 %r30;\n" \
+ " ld.param.u32 %ar1, [%in_ar1];\n" \
+ " mov.u32 %r24, %ar1;\n" \
+ " setp.ne.u32 %r27,%r24,4;\n" \
+ " set.u32.eq.u32 %r30,%r24,5;\n" \
+ " neg.s32 %r25, %r30;\n" \
+ " @%r27 bra $L3;\n" \
+ " mov.u32 %r25, 1;\n" \
+ "$L3:\n" \
+ " mov.u32 %retval, %r25;\n" \
+ " st.param.u32 [%out_retval], %retval;\n" \
+ " ret;\n" \
+ " }\n" \
+ ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1);\n" \
+ ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1)\n" \
+ "{\n" \
+ " .reg.u64 %ar1;\n" \
+ ".reg.u32 %retval;\n" \
+ " .reg.u64 %hr10;\n" \
+ " .reg.u64 %r25;\n" \
+ " .reg.u32 %r26;\n" \
+ " .reg.u32 %r27;\n" \
+ " ld.param.u64 %ar1, [%in_ar1];\n" \
+ " mov.u64 %r25, %ar1;\n" \
+ " ld.u32 %r26, [%r25];\n" \
+ " {\n" \
+ " .param.u32 %retval_in;\n" \
+ " {\n" \
+ " .param.u32 %out_arg0;\n" \
+ " st.param.u32 [%out_arg0], %r26;\n" \
+ " call (%retval_in), acc_on_device, (%out_arg0);\n" \
+ " }\n" \
+ " ld.param.u32 %r27, [%retval_in];\n" \
+ "}\n" \
+ " mov.u32 %retval, %r27;\n" \
+ " st.param.u32 [%out_retval], %retval;\n" \
+ " ret;\n" \
+ " }"
+
+ #define GOACC_INTERNAL_PTX \
+ ".version 3.1\n" \
+ ".target sm_30\n" \
+ ".address_size 64\n" \
+ ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" \
+ ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" \
+ ".extern .func abort;\n" \
+ ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n" \
+ "{\n" \
+ ".reg .u32 %retval;\n" \
+ ".reg .u64 %hr10;\n" \
+ ".reg .u32 %r22;\n" \
+ ".reg .u32 %r23;\n" \
+ ".reg .u32 %r24;\n" \
+ ".reg .u32 %r25;\n" \
+ ".reg .u32 %r26;\n" \
+ ".reg .u32 %r27;\n" \
+ ".reg .u32 %r28;\n" \
+ ".reg .u32 %r29;\n" \
+ "mov.u32 %r26,0;\n" \
+ "{\n" \
+ ".param .u32 %retval_in;\n" \
+ "{\n" \
+ ".param .u32 %out_arg0;\n" \
+ "st.param.u32 [%out_arg0],%r26;\n" \
+ "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \
+ "}\n" \
+ "ld.param.u32 %r27,[%retval_in];\n" \
+ "}\n" \
+ "mov.u32 %r22,%r27;\n" \
+ "mov.u32 %r28,0;\n" \
+ "{\n" \
+ ".param .u32 %retval_in;\n" \
+ "{\n" \
+ ".param .u32 %out_arg0;\n" \
+ "st.param.u32 [%out_arg0],%r28;\n" \
+ "call (%retval_in),GOACC_nctaid,(%out_arg0);\n" \
+ "}\n" \
+ "ld.param.u32 %r29,[%retval_in];\n" \
+ "}\n" \
+ "mov.u32 %r23,%r29;\n" \
+ "mul.lo.u32 %r24,%r22,%r23;\n" \
+ "mov.u32 %r25,%r24;\n" \
+ "mov.u32 %retval,%r25;\n" \
+ "st.param.u32 [%out_retval],%retval;\n" \
+ "ret;\n" \
+ "}\n" \
+ ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n" \
+ "{\n" \
+ ".reg .u32 %retval;\n" \
+ ".reg .u64 %hr10;\n" \
+ ".reg .u32 %r22;\n" \
+ ".reg .u32 %r23;\n" \
+ ".reg .u32 %r24;\n" \
+ ".reg .u32 %r25;\n" \
+ ".reg .u32 %r26;\n" \
+ ".reg .u32 %r27;\n" \
+ ".reg .u32 %r28;\n" \
+ ".reg .u32 %r29;\n" \
+ ".reg .u32 %r30;\n" \
+ ".reg .u32 %r31;\n" \
+ ".reg .u32 %r32;\n" \
+ ".reg .u32 %r33;\n" \
+ "mov.u32 %r28,0;\n" \
+ "{\n" \
+ ".param .u32 %retval_in;\n" \
+ "{\n" \
+ ".param .u32 %out_arg0;\n" \
+ "st.param.u32 [%out_arg0],%r28;\n" \
+ "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \
+ "}\n" \
+ "ld.param.u32 %r29,[%retval_in];\n" \
+ "}\n" \
+ "mov.u32 %r22,%r29;\n" \
+ "mov.u32 %r30,0;\n" \
+ "{\n" \
+ ".param .u32 %retval_in;\n" \
+ "{\n" \
+ ".param .u32 %out_arg0;\n" \
+ "st.param.u32 [%out_arg0],%r30;\n" \
+ "call (%retval_in),GOACC_ctaid,(%out_arg0);\n" \
+ "}\n" \
+ "ld.param.u32 %r31,[%retval_in];\n" \
+ "}\n" \
+ "mov.u32 %r23,%r31;\n" \
+ "mul.lo.u32 %r24,%r22,%r23;\n" \
+ "mov.u32 %r32,0;\n" \
+ "{\n" \
+ ".param .u32 %retval_in;\n" \
+ "{\n" \
+ ".param .u32 %out_arg0;\n" \
+ "st.param.u32 [%out_arg0],%r32;\n" \
+ "call (%retval_in),GOACC_tid,(%out_arg0);\n" \
+ "}\n" \
+ "ld.param.u32 %r33,[%retval_in];\n" \
+ "}\n" \
+ "mov.u32 %r25,%r33;\n" \
+ "add.u32 %r26,%r24,%r25;\n" \
+ "mov.u32 %r27,%r26;\n" \
+ "mov.u32 %retval,%r27;\n" \
+ "st.param.u32 [%out_retval],%retval;\n" \
+ "ret;\n" \
+ "}\n"
diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90
new file mode 100644
index 0000000..04d8088
--- /dev/null
+++ b/libgomp/openacc.f90
@@ -0,0 +1,956 @@
+! OpenACC Runtime Library Definitions.
+
+! Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+! Contributed by Tobias Burnus <burnus@net-b.de>
+! and Mentor Embedded.
+
+! This file is part of the GNU Offloading and Multi Processing Library
+! (libgomp).
+
+! Libgomp is free software; you can redistribute it and/or modify it
+! under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 3, or (at your option)
+! any later version.
+
+! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+! FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+! more details.
+
+! Under Section 7 of GPL version 3, you are granted additional
+! permissions described in the GCC Runtime Library Exception, version
+! 3.1, as published by the Free Software Foundation.
+
+! You should have received a copy of the GNU General Public License and
+! a copy of the GCC Runtime Library Exception along with this program;
+! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+! <http://www.gnu.org/licenses/>.
+
+module openacc_kinds
+ use iso_fortran_env, only: int32
+ implicit none
+
+ private :: int32
+ public :: acc_device_kind
+
+ integer, parameter :: acc_device_kind = int32
+
+ public :: acc_device_none, acc_device_default, acc_device_host
+ public :: acc_device_not_host, acc_device_nvidia
+
+ ! Keep in sync with include/gomp-constants.h.
+ integer (acc_device_kind), parameter :: acc_device_none = 0
+ integer (acc_device_kind), parameter :: acc_device_default = 1
+ integer (acc_device_kind), parameter :: acc_device_host = 2
+ integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3
+ integer (acc_device_kind), parameter :: acc_device_not_host = 4
+ integer (acc_device_kind), parameter :: acc_device_nvidia = 5
+
+ public :: acc_handle_kind
+
+ integer, parameter :: acc_handle_kind = int32
+
+ public :: acc_async_noval, acc_async_sync
+
+ ! Keep in sync with include/gomp-constants.h.
+ integer (acc_handle_kind), parameter :: acc_async_noval = -1
+ integer (acc_handle_kind), parameter :: acc_async_sync = -2
+
+end module
+
+module openacc_internal
+ use openacc_kinds
+ implicit none
+
+ interface
+ function acc_get_num_devices_h (d)
+ import
+ integer acc_get_num_devices_h
+ integer (acc_device_kind) d
+ end function
+
+ subroutine acc_set_device_type_h (d)
+ import
+ integer (acc_device_kind) d
+ end subroutine
+
+ function acc_get_device_type_h ()
+ import
+ integer (acc_device_kind) acc_get_device_type_h
+ end function
+
+ subroutine acc_set_device_num_h (n, d)
+ import
+ integer n
+ integer (acc_device_kind) d
+ end subroutine
+
+ function acc_get_device_num_h (d)
+ import
+ integer acc_get_device_num_h
+ integer (acc_device_kind) d
+ end function
+
+ function acc_async_test_h (a)
+ logical acc_async_test_h
+ integer a
+ end function
+
+ function acc_async_test_all_h ()
+ logical acc_async_test_all_h
+ end function
+
+ subroutine acc_wait_h (a)
+ integer a
+ end subroutine
+
+ subroutine acc_wait_async_h (a1, a2)
+ integer a1, a2
+ end subroutine
+
+ subroutine acc_wait_all_h ()
+ end subroutine
+
+ subroutine acc_wait_all_async_h (a)
+ integer a
+ end subroutine
+
+ subroutine acc_init_h (d)
+ import
+ integer (acc_device_kind) d
+ end subroutine
+
+ subroutine acc_shutdown_h (d)
+ import
+ integer (acc_device_kind) d
+ end subroutine
+
+ function acc_on_device_h (d)
+ import
+ integer (acc_device_kind) d
+ logical acc_on_device_h
+ end function
+
+ subroutine acc_copyin_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_copyin_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_copyin_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+
+ subroutine acc_present_or_copyin_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_present_or_copyin_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_present_or_copyin_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+
+ subroutine acc_create_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_create_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_create_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+
+ subroutine acc_present_or_create_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_present_or_create_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_present_or_create_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+
+ subroutine acc_copyout_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_copyout_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_copyout_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+
+ subroutine acc_delete_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_delete_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_delete_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+
+ subroutine acc_update_device_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_update_device_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_update_device_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+
+ subroutine acc_update_self_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_update_self_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_update_self_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+
+ function acc_is_present_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ logical acc_is_present_32_h
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end function
+
+ function acc_is_present_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ logical acc_is_present_64_h
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end function
+
+ function acc_is_present_array_h (a)
+ logical acc_is_present_array_h
+ type (*), dimension (..), contiguous :: a
+ end function
+ end interface
+
+ interface
+ function acc_get_num_devices_l (d) &
+ bind (C, name = "acc_get_num_devices")
+ use iso_c_binding, only: c_int
+ integer (c_int) :: acc_get_num_devices_l
+ integer (c_int), value :: d
+ end function
+
+ subroutine acc_set_device_type_l (d) &
+ bind (C, name = "acc_set_device_type")
+ use iso_c_binding, only: c_int
+ integer (c_int), value :: d
+ end subroutine
+
+ function acc_get_device_type_l () &
+ bind (C, name = "acc_get_device_type")
+ use iso_c_binding, only: c_int
+ integer (c_int) :: acc_get_device_type_l
+ end function
+
+ subroutine acc_set_device_num_l (n, d) &
+ bind (C, name = "acc_set_device_num")
+ use iso_c_binding, only: c_int
+ integer (c_int), value :: n, d
+ end subroutine
+
+ function acc_get_device_num_l (d) &
+ bind (C, name = "acc_get_device_num")
+ use iso_c_binding, only: c_int
+ integer (c_int) :: acc_get_device_num_l
+ integer (c_int), value :: d
+ end function
+
+ function acc_async_test_l (a) &
+ bind (C, name = "acc_async_test")
+ use iso_c_binding, only: c_int
+ integer (c_int) :: acc_async_test_l
+ integer (c_int), value :: a
+ end function
+
+ function acc_async_test_all_l () &
+ bind (C, name = "acc_async_test_all")
+ use iso_c_binding, only: c_int
+ integer (c_int) :: acc_async_test_all_l
+ end function
+
+ subroutine acc_wait_l (a) &
+ bind (C, name = "acc_wait")
+ use iso_c_binding, only: c_int
+ integer (c_int), value :: a
+ end subroutine
+
+ subroutine acc_wait_async_l (a1, a2) &
+ bind (C, name = "acc_wait_async")
+ use iso_c_binding, only: c_int
+ integer (c_int), value :: a1, a2
+ end subroutine
+
+ subroutine acc_wait_all_l () &
+ bind (C, name = "acc_wait_all")
+ use iso_c_binding, only: c_int
+ end subroutine
+
+ subroutine acc_wait_all_async_l (a) &
+ bind (C, name = "acc_wait_all_async")
+ use iso_c_binding, only: c_int
+ integer (c_int), value :: a
+ end subroutine
+
+ subroutine acc_init_l (d) &
+ bind (C, name = "acc_init")
+ use iso_c_binding, only: c_int
+ integer (c_int), value :: d
+ end subroutine
+
+ subroutine acc_shutdown_l (d) &
+ bind (C, name = "acc_shutdown")
+ use iso_c_binding, only: c_int
+ integer (c_int), value :: d
+ end subroutine
+
+ function acc_on_device_l (d) &
+ bind (C, name = "acc_on_device")
+ use iso_c_binding, only: c_int
+ integer (c_int) :: acc_on_device_l
+ integer (c_int), value :: d
+ end function
+
+ subroutine acc_copyin_l (a, len) &
+ bind (C, name = "acc_copyin")
+ use iso_c_binding, only: c_size_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_size_t), value :: len
+ end subroutine
+
+ subroutine acc_present_or_copyin_l (a, len) &
+ bind (C, name = "acc_present_or_copyin")
+ use iso_c_binding, only: c_size_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_size_t), value :: len
+ end subroutine
+
+ subroutine acc_create_l (a, len) &
+ bind (C, name = "acc_create")
+ use iso_c_binding, only: c_size_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_size_t), value :: len
+ end subroutine
+
+ subroutine acc_present_or_create_l (a, len) &
+ bind (C, name = "acc_present_or_create")
+ use iso_c_binding, only: c_size_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_size_t), value :: len
+ end subroutine
+
+ subroutine acc_copyout_l (a, len) &
+ bind (C, name = "acc_copyout")
+ use iso_c_binding, only: c_size_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_size_t), value :: len
+ end subroutine
+
+ subroutine acc_delete_l (a, len) &
+ bind (C, name = "acc_delete")
+ use iso_c_binding, only: c_size_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_size_t), value :: len
+ end subroutine
+
+ subroutine acc_update_device_l (a, len) &
+ bind (C, name = "acc_update_device")
+ use iso_c_binding, only: c_size_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_size_t), value :: len
+ end subroutine
+
+ subroutine acc_update_self_l (a, len) &
+ bind (C, name = "acc_update_self")
+ use iso_c_binding, only: c_size_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_size_t), value :: len
+ end subroutine
+
+ function acc_is_present_l (a, len) &
+ bind (C, name = "acc_is_present")
+ use iso_c_binding, only: c_int32_t, c_size_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ integer (c_int32_t) :: acc_is_present_l
+ type (*), dimension (*) :: a
+ integer (c_size_t), value :: len
+ end function
+ end interface
+end module
+
+module openacc
+ use openacc_kinds
+ use openacc_internal
+ implicit none
+
+ public :: openacc_version
+
+ public :: acc_get_num_devices, acc_set_device_type, acc_get_device_type
+ public :: acc_set_device_num, acc_get_device_num, acc_async_test
+ public :: acc_async_test_all, acc_wait, acc_wait_async, acc_wait_all
+ public :: acc_wait_all_async, acc_init, acc_shutdown, acc_on_device
+ public :: acc_copyin, acc_present_or_copyin, acc_pcopyin, acc_create
+ public :: acc_present_or_create, acc_pcreate, acc_copyout, acc_delete
+ public :: acc_update_device, acc_update_self, acc_is_present
+
+ integer, parameter :: openacc_version = 201306
+
+ interface acc_get_num_devices
+ procedure :: acc_get_num_devices_h
+ end interface
+
+ interface acc_set_device_type
+ procedure :: acc_set_device_type_h
+ end interface
+
+ interface acc_get_device_type
+ procedure :: acc_get_device_type_h
+ end interface
+
+ interface acc_set_device_num
+ procedure :: acc_set_device_num_h
+ end interface
+
+ interface acc_get_device_num
+ procedure :: acc_get_device_num_h
+ end interface
+
+ interface acc_async_test
+ procedure :: acc_async_test_h
+ end interface
+
+ interface acc_async_test_all
+ procedure :: acc_async_test_all_h
+ end interface
+
+ interface acc_wait
+ procedure :: acc_wait_h
+ end interface
+
+ interface acc_wait_async
+ procedure :: acc_wait_async_h
+ end interface
+
+ interface acc_wait_all
+ procedure :: acc_wait_all_h
+ end interface
+
+ interface acc_wait_all_async
+ procedure :: acc_wait_all_async_h
+ end interface
+
+ interface acc_init
+ procedure :: acc_init_h
+ end interface
+
+ interface acc_shutdown
+ procedure :: acc_shutdown_h
+ end interface
+
+ interface acc_on_device
+ procedure :: acc_on_device_h
+ end interface
+
+ ! acc_malloc: Only available in C/C++
+ ! acc_free: Only available in C/C++
+
+ ! As vendor extension, the following code supports both 32bit and 64bit
+ ! arguments for "size"; the OpenACC standard only permits default-kind
+ ! integers, which are of kind 4 (i.e. 32 bits).
+ ! Additionally, the two-argument version also takes arrays as argument.
+ ! and the one argument version also scalars. Note that the code assumes
+ ! that the arrays are contiguous.
+
+ interface acc_copyin
+ procedure :: acc_copyin_32_h
+ procedure :: acc_copyin_64_h
+ procedure :: acc_copyin_array_h
+ end interface
+
+ interface acc_present_or_copyin
+ procedure :: acc_present_or_copyin_32_h
+ procedure :: acc_present_or_copyin_64_h
+ procedure :: acc_present_or_copyin_array_h
+ end interface
+
+ interface acc_pcopyin
+ procedure :: acc_present_or_copyin_32_h
+ procedure :: acc_present_or_copyin_64_h
+ procedure :: acc_present_or_copyin_array_h
+ end interface
+
+ interface acc_create
+ procedure :: acc_create_32_h
+ procedure :: acc_create_64_h
+ procedure :: acc_create_array_h
+ end interface
+
+ interface acc_present_or_create
+ procedure :: acc_present_or_create_32_h
+ procedure :: acc_present_or_create_64_h
+ procedure :: acc_present_or_create_array_h
+ end interface
+
+ interface acc_pcreate
+ procedure :: acc_present_or_create_32_h
+ procedure :: acc_present_or_create_64_h
+ procedure :: acc_present_or_create_array_h
+ end interface
+
+ interface acc_copyout
+ procedure :: acc_copyout_32_h
+ procedure :: acc_copyout_64_h
+ procedure :: acc_copyout_array_h
+ end interface
+
+ interface acc_delete
+ procedure :: acc_delete_32_h
+ procedure :: acc_delete_64_h
+ procedure :: acc_delete_array_h
+ end interface
+
+ interface acc_update_device
+ procedure :: acc_update_device_32_h
+ procedure :: acc_update_device_64_h
+ procedure :: acc_update_device_array_h
+ end interface
+
+ interface acc_update_self
+ procedure :: acc_update_self_32_h
+ procedure :: acc_update_self_64_h
+ procedure :: acc_update_self_array_h
+ end interface
+
+ ! acc_map_data: Only available in C/C++
+ ! acc_unmap_data: Only available in C/C++
+ ! acc_deviceptr: Only available in C/C++
+ ! acc_hostptr: Only available in C/C++
+
+ interface acc_is_present
+ procedure :: acc_is_present_32_h
+ procedure :: acc_is_present_64_h
+ procedure :: acc_is_present_array_h
+ end interface
+
+ ! acc_memcpy_to_device: Only available in C/C++
+ ! acc_memcpy_from_device: Only available in C/C++
+
+end module
+
+function acc_get_num_devices_h (d)
+ use openacc_internal, only: acc_get_num_devices_l
+ use openacc_kinds
+ integer acc_get_num_devices_h
+ integer (acc_device_kind) d
+ acc_get_num_devices_h = acc_get_num_devices_l (d)
+end function
+
+subroutine acc_set_device_type_h (d)
+ use openacc_internal, only: acc_set_device_type_l
+ use openacc_kinds
+ integer (acc_device_kind) d
+ call acc_set_device_type_l (d)
+end subroutine
+
+function acc_get_device_type_h ()
+ use openacc_internal, only: acc_get_device_type_l
+ use openacc_kinds
+ integer (acc_device_kind) acc_get_device_type_h
+ acc_get_device_type_h = acc_get_device_type_l ()
+end function
+
+subroutine acc_set_device_num_h (n, d)
+ use openacc_internal, only: acc_set_device_num_l
+ use openacc_kinds
+ integer n
+ integer (acc_device_kind) d
+ call acc_set_device_num_l (n, d)
+end subroutine
+
+function acc_get_device_num_h (d)
+ use openacc_internal, only: acc_get_device_num_l
+ use openacc_kinds
+ integer acc_get_device_num_h
+ integer (acc_device_kind) d
+ acc_get_device_num_h = acc_get_device_num_l (d)
+end function
+
+function acc_async_test_h (a)
+ use openacc_internal, only: acc_async_test_l
+ logical acc_async_test_h
+ integer a
+ if (acc_async_test_l (a) .eq. 1) then
+ acc_async_test_h = .TRUE.
+ else
+ acc_async_test_h = .FALSE.
+ end if
+end function
+
+function acc_async_test_all_h ()
+ use openacc_internal, only: acc_async_test_all_l
+ logical acc_async_test_all_h
+ if (acc_async_test_all_l () .eq. 1) then
+ acc_async_test_all_h = .TRUE.
+ else
+ acc_async_test_all_h = .FALSE.
+ end if
+end function
+
+subroutine acc_wait_h (a)
+ use openacc_internal, only: acc_wait_l
+ integer a
+ call acc_wait_l (a)
+end subroutine
+
+subroutine acc_wait_async_h (a1, a2)
+ use openacc_internal, only: acc_wait_async_l
+ integer a1, a2
+ call acc_wait_async_l (a1, a2)
+end subroutine
+
+subroutine acc_wait_all_h ()
+ use openacc_internal, only: acc_wait_all_l
+ call acc_wait_all_l ()
+end subroutine
+
+subroutine acc_wait_all_async_h (a)
+ use openacc_internal, only: acc_wait_all_async_l
+ integer a
+ call acc_wait_all_async_l (a)
+end subroutine
+
+subroutine acc_init_h (d)
+ use openacc_internal, only: acc_init_l
+ use openacc_kinds
+ integer (acc_device_kind) d
+ call acc_init_l (d)
+end subroutine
+
+subroutine acc_shutdown_h (d)
+ use openacc_internal, only: acc_shutdown_l
+ use openacc_kinds
+ integer (acc_device_kind) d
+ call acc_shutdown_l (d)
+end subroutine
+
+function acc_on_device_h (d)
+ use openacc_internal, only: acc_on_device_l
+ use openacc_kinds
+ integer (acc_device_kind) d
+ logical acc_on_device_h
+ if (acc_on_device_l (d) .eq. 1) then
+ acc_on_device_h = .TRUE.
+ else
+ acc_on_device_h = .FALSE.
+ end if
+end function
+
+subroutine acc_copyin_32_h (a, len)
+ use iso_c_binding, only: c_int32_t, c_size_t
+ use openacc_internal, only: acc_copyin_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ call acc_copyin_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_copyin_64_h (a, len)
+ use iso_c_binding, only: c_int64_t, c_size_t
+ use openacc_internal, only: acc_copyin_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ call acc_copyin_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_copyin_array_h (a)
+ use openacc_internal, only: acc_copyin_l
+ type (*), dimension (..), contiguous :: a
+ call acc_copyin_l (a, sizeof (a))
+end subroutine
+
+subroutine acc_present_or_copyin_32_h (a, len)
+ use iso_c_binding, only: c_int32_t, c_size_t
+ use openacc_internal, only: acc_present_or_copyin_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ call acc_present_or_copyin_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_present_or_copyin_64_h (a, len)
+ use iso_c_binding, only: c_int64_t, c_size_t
+ use openacc_internal, only: acc_present_or_copyin_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ call acc_present_or_copyin_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_present_or_copyin_array_h (a)
+ use openacc_internal, only: acc_present_or_copyin_l
+ type (*), dimension (..), contiguous :: a
+ call acc_present_or_copyin_l (a, sizeof (a))
+end subroutine
+
+subroutine acc_create_32_h (a, len)
+ use iso_c_binding, only: c_int32_t, c_size_t
+ use openacc_internal, only: acc_create_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ call acc_create_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_create_64_h (a, len)
+ use iso_c_binding, only: c_int64_t, c_size_t
+ use openacc_internal, only: acc_create_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ call acc_create_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_create_array_h (a)
+ use openacc_internal, only: acc_create_l
+ type (*), dimension (..), contiguous :: a
+ call acc_create_l (a, sizeof (a))
+end subroutine
+
+subroutine acc_present_or_create_32_h (a, len)
+ use iso_c_binding, only: c_int32_t, c_size_t
+ use openacc_internal, only: acc_present_or_create_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ call acc_present_or_create_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_present_or_create_64_h (a, len)
+ use iso_c_binding, only: c_int64_t, c_size_t
+ use openacc_internal, only: acc_present_or_create_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ call acc_present_or_create_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_present_or_create_array_h (a)
+ use openacc_internal, only: acc_present_or_create_l
+ type (*), dimension (..), contiguous :: a
+ call acc_present_or_create_l (a, sizeof (a))
+end subroutine
+
+subroutine acc_copyout_32_h (a, len)
+ use iso_c_binding, only: c_int32_t, c_size_t
+ use openacc_internal, only: acc_copyout_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ call acc_copyout_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_copyout_64_h (a, len)
+ use iso_c_binding, only: c_int64_t, c_size_t
+ use openacc_internal, only: acc_copyout_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ call acc_copyout_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_copyout_array_h (a)
+ use openacc_internal, only: acc_copyout_l
+ type (*), dimension (..), contiguous :: a
+ call acc_copyout_l (a, sizeof (a))
+end subroutine
+
+subroutine acc_delete_32_h (a, len)
+ use iso_c_binding, only: c_int32_t, c_size_t
+ use openacc_internal, only: acc_delete_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ call acc_delete_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_delete_64_h (a, len)
+ use iso_c_binding, only: c_int64_t, c_size_t
+ use openacc_internal, only: acc_delete_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ call acc_delete_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_delete_array_h (a)
+ use openacc_internal, only: acc_delete_l
+ type (*), dimension (..), contiguous :: a
+ call acc_delete_l (a, sizeof (a))
+end subroutine
+
+subroutine acc_update_device_32_h (a, len)
+ use iso_c_binding, only: c_int32_t, c_size_t
+ use openacc_internal, only: acc_update_device_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ call acc_update_device_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_update_device_64_h (a, len)
+ use iso_c_binding, only: c_int64_t, c_size_t
+ use openacc_internal, only: acc_update_device_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ call acc_update_device_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_update_device_array_h (a)
+ use openacc_internal, only: acc_update_device_l
+ type (*), dimension (..), contiguous :: a
+ call acc_update_device_l (a, sizeof (a))
+end subroutine
+
+subroutine acc_update_self_32_h (a, len)
+ use iso_c_binding, only: c_int32_t, c_size_t
+ use openacc_internal, only: acc_update_self_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ call acc_update_self_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_update_self_64_h (a, len)
+ use iso_c_binding, only: c_int64_t, c_size_t
+ use openacc_internal, only: acc_update_self_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ call acc_update_self_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_update_self_array_h (a)
+ use openacc_internal, only: acc_update_self_l
+ type (*), dimension (..), contiguous :: a
+ call acc_update_self_l (a, sizeof (a))
+end subroutine
+
+function acc_is_present_32_h (a, len)
+ use iso_c_binding, only: c_int32_t, c_size_t
+ use openacc_internal, only: acc_is_present_l
+ logical acc_is_present_32_h
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then
+ acc_is_present_32_h = .TRUE.
+ else
+ acc_is_present_32_h = .FALSE.
+ end if
+end function
+
+function acc_is_present_64_h (a, len)
+ use iso_c_binding, only: c_int64_t, c_size_t
+ use openacc_internal, only: acc_is_present_l
+ logical acc_is_present_64_h
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ if (acc_is_present_l (a, int (len, kind = c_size_t)) .eq. 1) then
+ acc_is_present_64_h = .TRUE.
+ else
+ acc_is_present_64_h = .FALSE.
+ end if
+end function
+
+function acc_is_present_array_h (a)
+ use openacc_internal, only: acc_is_present_l
+ logical acc_is_present_array_h
+ type (*), dimension (..), contiguous :: a
+ acc_is_present_array_h = acc_is_present_l (a, sizeof (a)) == 1
+end function
diff --git a/libgomp/openacc.h b/libgomp/openacc.h
new file mode 100644
index 0000000..33432411
--- /dev/null
+++ b/libgomp/openacc.h
@@ -0,0 +1,118 @@
+/* OpenACC Runtime Library User-facing Declarations
+
+ Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _OPENACC_H
+#define _OPENACC_H 1
+
+/* The OpenACC standard is silent on whether or not including <openacc.h>
+ might or must not include other header files. We chose to include
+ some. */
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if __cplusplus >= 201103
+# define __GOACC_NOTHROW noexcept ()
+#elif __cplusplus
+# define __GOACC_NOTHROW throw ()
+#else /* Not C++ */
+# define __GOACC_NOTHROW __attribute__ ((__nothrow__))
+#endif
+
+/* Types */
+typedef enum acc_device_t
+ {
+ /* Keep in sync with include/gomp-constants.h. */
+ acc_device_none = 0,
+ acc_device_default = 1,
+ acc_device_host = 2,
+ acc_device_host_nonshm = 3,
+ acc_device_not_host = 4,
+ acc_device_nvidia = 5,
+ _ACC_device_hwm
+ } acc_device_t;
+
+typedef enum acc_async_t
+ {
+ /* Keep in sync with include/gomp-constants.h. */
+ acc_async_noval = -1,
+ acc_async_sync = -2
+ } acc_async_t;
+
+int acc_get_num_devices (acc_device_t) __GOACC_NOTHROW;
+void acc_set_device_type (acc_device_t) __GOACC_NOTHROW;
+acc_device_t acc_get_device_type (void) __GOACC_NOTHROW;
+void acc_set_device_num (int, acc_device_t) __GOACC_NOTHROW;
+int acc_get_device_num (acc_device_t) __GOACC_NOTHROW;
+int acc_async_test (int) __GOACC_NOTHROW;
+int acc_async_test_all (void) __GOACC_NOTHROW;
+void acc_wait (int) __GOACC_NOTHROW;
+void acc_wait_async (int, int) __GOACC_NOTHROW;
+void acc_wait_all (void) __GOACC_NOTHROW;
+void acc_wait_all_async (int) __GOACC_NOTHROW;
+void acc_init (acc_device_t) __GOACC_NOTHROW;
+void acc_shutdown (acc_device_t) __GOACC_NOTHROW;
+int acc_on_device (acc_device_t) __GOACC_NOTHROW;
+void *acc_malloc (size_t) __GOACC_NOTHROW;
+void acc_free (void *) __GOACC_NOTHROW;
+/* Some of these would be more correct with const qualifiers, but
+ the standard specifies otherwise. */
+void *acc_copyin (void *, size_t) __GOACC_NOTHROW;
+void *acc_present_or_copyin (void *, size_t) __GOACC_NOTHROW;
+void *acc_create (void *, size_t) __GOACC_NOTHROW;
+void *acc_present_or_create (void *, size_t) __GOACC_NOTHROW;
+void acc_copyout (void *, size_t) __GOACC_NOTHROW;
+void acc_delete (void *, size_t) __GOACC_NOTHROW;
+void acc_update_device (void *, size_t) __GOACC_NOTHROW;
+void acc_update_self (void *, size_t) __GOACC_NOTHROW;
+void acc_map_data (void *, void *, size_t) __GOACC_NOTHROW;
+void acc_unmap_data (void *) __GOACC_NOTHROW;
+void *acc_deviceptr (void *) __GOACC_NOTHROW;
+void *acc_hostptr (void *) __GOACC_NOTHROW;
+int acc_is_present (void *, size_t) __GOACC_NOTHROW;
+void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
+void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
+
+/* Old names. OpenACC does not specify whether these can or must
+ not be macros, inlines or aliases for the new names. */
+#define acc_pcreate acc_present_or_create
+#define acc_pcopyin acc_present_or_copyin
+
+/* CUDA-specific routines. */
+void *acc_get_current_cuda_device (void) __GOACC_NOTHROW;
+void *acc_get_current_cuda_context (void) __GOACC_NOTHROW;
+void *acc_get_cuda_stream (int) __GOACC_NOTHROW;
+int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _OPENACC_H */
diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h
new file mode 100644
index 0000000..28659a1
--- /dev/null
+++ b/libgomp/openacc_lib.h
@@ -0,0 +1,381 @@
+! OpenACC Runtime Library Definitions. -*- mode: fortran -*-
+
+! Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+! Contributed by Tobias Burnus <burnus@net-b.de>
+! and Mentor Embedded.
+
+! This file is part of the GNU Offloading and Multi Processing Library
+! (libgomp).
+
+! Libgomp is free software; you can redistribute it and/or modify it
+! under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 3, or (at your option)
+! any later version.
+
+! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+! FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+! more details.
+
+! Under Section 7 of GPL version 3, you are granted additional
+! permissions described in the GCC Runtime Library Exception, version
+! 3.1, as published by the Free Software Foundation.
+
+! You should have received a copy of the GNU General Public License and
+! a copy of the GCC Runtime Library Exception along with this program;
+! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+! <http://www.gnu.org/licenses/>.
+
+! NOTE: Due to the use of dimension (..), the code only works when compiled
+! with -std=f2008ts/gnu/legacy but not with other standard settings.
+! Alternatively, the user can use the module version, which permits
+! compilation with -std=f95.
+
+ integer, parameter :: acc_device_kind = 4
+
+! Keep in sync with include/gomp-constants.h.
+ integer (acc_device_kind), parameter :: acc_device_none = 0
+ integer (acc_device_kind), parameter :: acc_device_default = 1
+ integer (acc_device_kind), parameter :: acc_device_host = 2
+ integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3
+ integer (acc_device_kind), parameter :: acc_device_not_host = 4
+ integer (acc_device_kind), parameter :: acc_device_nvidia = 5
+
+ integer, parameter :: acc_handle_kind = 4
+
+! Keep in sync with include/gomp-constants.h.
+ integer (acc_handle_kind), parameter :: acc_async_noval = -1
+ integer (acc_handle_kind), parameter :: acc_async_sync = -2
+
+ integer, parameter :: openacc_version = 201306
+
+ interface acc_get_num_devices
+ function acc_get_num_devices_h (d)
+ import acc_device_kind
+ integer acc_get_num_devices_h
+ integer (acc_device_kind) d
+ end function
+ end interface
+
+ interface acc_set_device_type
+ subroutine acc_set_device_type_h (d)
+ import acc_device_kind
+ integer (acc_device_kind) d
+ end subroutine
+ end interface
+
+ interface acc_get_device_type
+ function acc_get_device_type_h ()
+ import acc_device_kind
+ integer (acc_device_kind) acc_get_device_type_h
+ end function
+ end interface
+
+ interface acc_set_device_num
+ subroutine acc_set_device_num_h (n, d)
+ import acc_device_kind
+ integer n
+ integer (acc_device_kind) d
+ end subroutine
+ end interface
+
+ interface acc_get_device_num
+ function acc_get_device_num_h (d)
+ import acc_device_kind
+ integer acc_get_device_num_h
+ integer (acc_device_kind) d
+ end function
+ end interface
+
+ interface acc_async_test
+ function acc_async_test_h (a)
+ logical acc_async_test_h
+ integer a
+ end function
+ end interface
+
+ interface acc_async_test_all
+ function acc_async_test_all_h ()
+ logical acc_async_test_all_h
+ end function
+ end interface
+
+ interface acc_wait
+ subroutine acc_wait_h (a)
+ integer a
+ end subroutine
+ end interface
+
+ interface acc_wait_async
+ subroutine acc_wait_async_h (a1, a2)
+ integer a1, a2
+ end subroutine
+ end interface
+
+ interface acc_wait_all
+ subroutine acc_wait_all_h ()
+ end subroutine
+ end interface
+
+ interface acc_wait_all_async
+ subroutine acc_wait_all_async_h (a)
+ integer a
+ end subroutine
+ end interface
+
+ interface acc_init
+ subroutine acc_init_h (devicetype)
+ import acc_device_kind
+ integer (acc_device_kind) devicetype
+ end subroutine
+ end interface
+
+ interface acc_shutdown
+ subroutine acc_shutdown_h (devicetype)
+ import acc_device_kind
+ integer (acc_device_kind) devicetype
+ end subroutine
+ end interface
+
+ interface acc_on_device
+ function acc_on_device_h (devicetype)
+ import acc_device_kind
+ logical acc_on_device_h
+ integer (acc_device_kind) devicetype
+ end function
+ end interface
+
+ ! acc_malloc: Only available in C/C++
+ ! acc_free: Only available in C/C++
+
+ interface acc_copyin
+ subroutine acc_copyin_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_copyin_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_copyin_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+ end interface
+
+ interface acc_present_or_copyin
+ subroutine acc_present_or_copyin_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_present_or_copyin_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_present_or_copyin_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+ end interface
+
+ interface acc_pcopyin
+ subroutine acc_pcopyin_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_pcopyin_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_pcopyin_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+ end interface
+
+ interface acc_create
+ subroutine acc_create_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_create_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_create_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+ end interface
+
+ interface acc_present_or_create
+ subroutine acc_present_or_create_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_present_or_create_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_present_or_create_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+ end interface
+
+ interface acc_pcreate
+ subroutine acc_pcreate_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_pcreate_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_pcreate_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+ end interface
+
+ interface acc_copyout
+ subroutine acc_copyout_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_copyout_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_copyout_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+ end interface
+
+ interface acc_delete
+ subroutine acc_delete_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_delete_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_delete_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+ end interface
+
+ interface acc_update_device
+ subroutine acc_update_device_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_update_device_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_update_device_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+ end interface
+
+ interface acc_update_self
+ subroutine acc_update_self_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_update_self_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_update_self_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+ end interface
+
+ ! acc_map_data: Only available in C/C++
+ ! acc_unmap_data: Only available in C/C++
+ ! acc_deviceptr: Only available in C/C++
+ ! acc_ostptr: Only available in C/C++
+
+ interface acc_is_present
+ function acc_is_present_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ logical acc_is_present_32_h
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end function
+
+ function acc_is_present_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ logical acc_is_present_64_h
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end function
+
+ function acc_is_present_array_h (a)
+ logical acc_is_present_array_h
+ type (*), dimension (..), contiguous :: a
+ end function
+ end interface
+
+ ! acc_memcpy_to_device: Only available in C/C++
+ ! acc_memcpy_from_device: Only available in C/C++
diff --git a/libgomp/plugin/Makefrag.am b/libgomp/plugin/Makefrag.am
new file mode 100644
index 0000000..167485f
--- /dev/null
+++ b/libgomp/plugin/Makefrag.am
@@ -0,0 +1,49 @@
+# Plugins for offload execution, Makefile.am fragment.
+#
+# Copyright (C) 2014-2015 Free Software Foundation, Inc.
+#
+# Contributed by Mentor Embedded.
+#
+# This file is part of the GNU Offloading and Multi Processing Library
+# (libgomp).
+#
+# Libgomp is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+# <http://www.gnu.org/licenses/>.
+
+if PLUGIN_NVPTX
+# Nvidia PTX OpenACC plugin.
+libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
+toolexeclib_LTLIBRARIES += libgomp-plugin-nvptx.la
+libgomp_plugin_nvptx_la_SOURCES = plugin/plugin-nvptx.c
+libgomp_plugin_nvptx_la_CPPFLAGS = $(AM_CPPFLAGS) $(PLUGIN_NVPTX_CPPFLAGS)
+libgomp_plugin_nvptx_la_LDFLAGS = $(libgomp_plugin_nvptx_version_info) \
+ $(lt_host_flags)
+libgomp_plugin_nvptx_la_LDFLAGS += $(PLUGIN_NVPTX_LDFLAGS)
+libgomp_plugin_nvptx_la_LIBADD = libgomp.la $(PLUGIN_NVPTX_LIBS)
+libgomp_plugin_nvptx_la_LIBTOOLFLAGS = --tag=disable-static
+endif
+
+libgomp_plugin_host_nonshm_version_info = -version-info $(libtool_VERSION)
+toolexeclib_LTLIBRARIES += libgomp-plugin-host_nonshm.la
+libgomp_plugin_host_nonshm_la_SOURCES = plugin/plugin-host.c
+libgomp_plugin_host_nonshm_la_CPPFLAGS = $(AM_CPPFLAGS) -DHOST_NONSHM_PLUGIN
+libgomp_plugin_host_nonshm_la_LDFLAGS = \
+ $(libgomp_plugin_host_nonshm_version_info) $(lt_host_flags)
+libgomp_plugin_host_nonshm_la_LIBADD = libgomp.la
+libgomp_plugin_host_nonshm_la_LIBTOOLFLAGS = --tag=disable-static
diff --git a/libgomp/plugin/configfrag.ac b/libgomp/plugin/configfrag.ac
new file mode 100644
index 0000000..254c688
--- /dev/null
+++ b/libgomp/plugin/configfrag.ac
@@ -0,0 +1,148 @@
+# Plugins for offload execution, configure.ac fragment. -*- mode: autoconf -*-
+#
+# Copyright (C) 2014-2015 Free Software Foundation, Inc.
+#
+# Contributed by Mentor Embedded.
+#
+# This file is part of the GNU Offloading and Multi Processing Library
+# (libgomp).
+#
+# Libgomp is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+# <http://www.gnu.org/licenses/>.
+
+offload_targets=
+AC_SUBST(offload_targets)
+plugin_support=yes
+AC_CHECK_LIB(dl, dlsym, , [plugin_support=no])
+if test x"$plugin_support" = xyes; then
+ AC_DEFINE(PLUGIN_SUPPORT, 1,
+ [Define if all infrastructure, needed for plugins, is supported.])
+ offload_targets=host_nonshm
+elif test "x${enable_offload_targets-no}" != xno; then
+ AC_MSG_ERROR([Can't support offloading without support for plugins])
+fi
+
+# Look for the CUDA driver package.
+CUDA_DRIVER_INCLUDE=
+CUDA_DRIVER_LIB=
+AC_SUBST(CUDA_DRIVER_INCLUDE)
+AC_SUBST(CUDA_DRIVER_LIB)
+CUDA_DRIVER_CPPFLAGS=
+CUDA_DRIVER_LDFLAGS=
+AC_ARG_WITH(cuda-driver,
+ [AS_HELP_STRING([--with-cuda-driver=PATH],
+ [specify prefix directory for installed CUDA driver package.
+ Equivalent to --with-cuda-driver-include=PATH/include
+ plus --with-cuda-driver-lib=PATH/lib])])
+AC_ARG_WITH(cuda-driver-include,
+ [AS_HELP_STRING([--with-cuda-driver-include=PATH],
+ [specify directory for installed CUDA driver include files])])
+AC_ARG_WITH(cuda-driver-lib,
+ [AS_HELP_STRING([--with-cuda-driver-lib=PATH],
+ [specify directory for the installed CUDA driver library])])
+if test "x$with_cuda_driver" != x; then
+ CUDA_DRIVER_INCLUDE=$with_cuda_driver/include
+ CUDA_DRIVER_LIB=$with_cuda_driver/lib
+fi
+if test "x$with_cuda_driver_include" != x; then
+ CUDA_DRIVER_INCLUDE=$with_cuda_driver_include
+fi
+if test "x$with_cuda_driver_lib" != x; then
+ CUDA_DRIVER_LIB=$with_cuda_driver_lib
+fi
+if test "x$CUDA_DRIVER_INCLUDE" != x; then
+ CUDA_DRIVER_CPPFLAGS=-I$CUDA_DRIVER_INCLUDE
+fi
+if test "x$CUDA_DRIVER_LIB" != x; then
+ CUDA_DRIVER_LDFLAGS=-L$CUDA_DRIVER_LIB
+fi
+
+PLUGIN_NVPTX=0
+PLUGIN_NVPTX_CPPFLAGS=
+PLUGIN_NVPTX_LDFLAGS=
+PLUGIN_NVPTX_LIBS=
+AC_SUBST(PLUGIN_NVPTX)
+AC_SUBST(PLUGIN_NVPTX_CPPFLAGS)
+AC_SUBST(PLUGIN_NVPTX_LDFLAGS)
+AC_SUBST(PLUGIN_NVPTX_LIBS)
+
+# Get offload targets and path to install tree of offloading compiler.
+offload_additional_options=
+offload_additional_lib_paths=
+AC_SUBST(offload_additional_options)
+AC_SUBST(offload_additional_lib_paths)
+if test x"$enable_offload_targets" != x; then
+ for tgt in `echo $enable_offload_targets | sed -e 's#,# #g'`; do
+ tgt_dir=`echo $tgt | grep '=' | sed 's/.*=//'`
+ tgt=`echo $tgt | sed 's/=.*//'`
+ case $tgt in
+ *-intelmic-* | *-intelmicemul-*)
+ tgt_name=intelmic
+ ;;
+ nvptx*)
+ tgt_name=nvptx
+ PLUGIN_NVPTX=$tgt
+ PLUGIN_NVPTX_CPPFLAGS=$CUDA_DRIVER_CPPFLAGS
+ PLUGIN_NVPTX_LDFLAGS=$CUDA_DRIVER_LDFLAGS
+ PLUGIN_NVPTX_LIBS='-lcuda'
+
+ PLUGIN_NVPTX_save_CPPFLAGS=$CPPFLAGS
+ CPPFLAGS="$PLUGIN_NVPTX_CPPFLAGS $CPPFLAGS"
+ PLUGIN_NVPTX_save_LDFLAGS=$LDFLAGS
+ LDFLAGS="$PLUGIN_NVPTX_LDFLAGS $LDFLAGS"
+ PLUGIN_NVPTX_save_LIBS=$LIBS
+ LIBS="$PLUGIN_NVPTX_LIBS $LIBS"
+ AC_LINK_IFELSE(
+ [AC_LANG_PROGRAM(
+ [#include "cuda.h"],
+ [CUresult r = cuCtxPushCurrent (NULL);])],
+ [PLUGIN_NVPTX=1])
+ CPPFLAGS=$PLUGIN_NVPTX_save_CPPFLAGS
+ LDFLAGS=$PLUGIN_NVPTX_save_LDFLAGS
+ LIBS=$PLUGIN_NVPTX_save_LIBS
+ case $PLUGIN_NVPTX in
+ nvptx*)
+ PLUGIN_NVPTX=0
+ AC_MSG_ERROR([CUDA driver package required for nvptx support])
+ ;;
+ esac
+ ;;
+ *)
+ AC_MSG_ERROR([unknown offload target specified])
+ ;;
+ esac
+ if test x"$offload_targets" = x; then
+ offload_targets=$tgt_name
+ else
+ offload_targets=$offload_targets,$tgt_name
+ fi
+ if test x"$tgt_dir" != x; then
+ offload_additional_options="$offload_additional_options -B$tgt_dir/libexec/gcc/\$(target_alias)/\$(gcc_version) -B$tgt_dir/bin"
+ offload_additional_lib_paths="$offload_additional_lib_paths:$tgt_dir/lib64:$tgt_dir/lib:$tgt_dir/lib32"
+ else
+ offload_additional_options="$offload_additional_options -B\$(libexecdir)/gcc/\$(target_alias)/\$(gcc_version) -B\$(bindir)"
+ offload_additional_lib_paths="$offload_additional_lib_paths:$toolexeclibdir"
+ fi
+ done
+fi
+AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets",
+ [Define to hold the list of target names suitable for offloading.])
+AM_CONDITIONAL([PLUGIN_NVPTX], [test $PLUGIN_NVPTX = 1])
+AC_DEFINE_UNQUOTED([PLUGIN_NVPTX], [$PLUGIN_NVPTX],
+ [Define to 1 if the NVIDIA plugin is built, 0 if not.])
diff --git a/libgomp/plugin/plugin-host.c b/libgomp/plugin/plugin-host.c
new file mode 100644
index 0000000..ebf7f11
--- /dev/null
+++ b/libgomp/plugin/plugin-host.c
@@ -0,0 +1,266 @@
+/* OpenACC Runtime Library: acc_device_host, acc_device_host_nonshm.
+
+ Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Simple implementation of support routines for a shared-memory
+ acc_device_host, and a non-shared memory acc_device_host_nonshm, with the
+ latter built as a plugin. */
+
+#include "openacc.h"
+#include "config.h"
+#ifdef HOST_NONSHM_PLUGIN
+#include "libgomp-plugin.h"
+#include "oacc-plugin.h"
+#else
+#include "libgomp.h"
+#include "oacc-int.h"
+#endif
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#ifdef HOST_NONSHM_PLUGIN
+#define STATIC
+#define GOMP(X) GOMP_PLUGIN_##X
+#define SELF "host_nonshm plugin: "
+#else
+#define STATIC static
+#define GOMP(X) gomp_##X
+#define SELF "host: "
+#endif
+
+STATIC const char *
+GOMP_OFFLOAD_get_name (void)
+{
+#ifdef HOST_NONSHM_PLUGIN
+ return "host_nonshm";
+#else
+ return "host";
+#endif
+}
+
+STATIC unsigned int
+GOMP_OFFLOAD_get_caps (void)
+{
+ unsigned int caps = (GOMP_OFFLOAD_CAP_OPENACC_200
+ | GOMP_OFFLOAD_CAP_NATIVE_EXEC);
+
+#ifndef HOST_NONSHM_PLUGIN
+ caps |= GOMP_OFFLOAD_CAP_SHARED_MEM;
+#endif
+
+ return caps;
+}
+
+STATIC int
+GOMP_OFFLOAD_get_type (void)
+{
+#ifdef HOST_NONSHM_PLUGIN
+ return OFFLOAD_TARGET_TYPE_HOST_NONSHM;
+#else
+ return OFFLOAD_TARGET_TYPE_HOST;
+#endif
+}
+
+STATIC int
+GOMP_OFFLOAD_get_num_devices (void)
+{
+ return 1;
+}
+
+STATIC void
+GOMP_OFFLOAD_register_image (void *host_table __attribute__ ((unused)),
+ void *target_data __attribute__ ((unused)))
+{
+}
+
+STATIC void
+GOMP_OFFLOAD_init_device (int n __attribute__ ((unused)))
+{
+}
+
+STATIC void
+GOMP_OFFLOAD_fini_device (int n __attribute__ ((unused)))
+{
+}
+
+STATIC int
+GOMP_OFFLOAD_get_table (int n __attribute__ ((unused)),
+ struct mapping_table **table __attribute__ ((unused)))
+{
+ return 0;
+}
+
+STATIC void *
+GOMP_OFFLOAD_openacc_open_device (int n)
+{
+ return (void *) (intptr_t) n;
+}
+
+STATIC int
+GOMP_OFFLOAD_openacc_close_device (void *hnd)
+{
+ return 0;
+}
+
+STATIC int
+GOMP_OFFLOAD_openacc_get_device_num (void)
+{
+ return 0;
+}
+
+STATIC void
+GOMP_OFFLOAD_openacc_set_device_num (int n)
+{
+ if (n > 0)
+ GOMP (fatal) ("device number %u out of range for host execution", n);
+}
+
+STATIC void *
+GOMP_OFFLOAD_alloc (int n __attribute__ ((unused)), size_t s)
+{
+ return GOMP (malloc) (s);
+}
+
+STATIC void
+GOMP_OFFLOAD_free (int n __attribute__ ((unused)), void *p)
+{
+ free (p);
+}
+
+STATIC void *
+GOMP_OFFLOAD_host2dev (int n __attribute__ ((unused)), void *d, const void *h,
+ size_t s)
+{
+#ifdef HOST_NONSHM_PLUGIN
+ memcpy (d, h, s);
+#endif
+
+ return 0;
+}
+
+STATIC void *
+GOMP_OFFLOAD_dev2host (int n __attribute__ ((unused)), void *h, const void *d,
+ size_t s)
+{
+#ifdef HOST_NONSHM_PLUGIN
+ memcpy (h, d, s);
+#endif
+
+ return 0;
+}
+
+STATIC void
+GOMP_OFFLOAD_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars)
+{
+ void (*fn)(void *) = (void (*)(void *)) fn_ptr;
+
+ fn (vars);
+}
+
+STATIC void
+GOMP_OFFLOAD_openacc_parallel (void (*fn) (void *),
+ size_t mapnum __attribute__ ((unused)),
+ void **hostaddrs __attribute__ ((unused)),
+ void **devaddrs __attribute__ ((unused)),
+ size_t *sizes __attribute__ ((unused)),
+ unsigned short *kinds __attribute__ ((unused)),
+ int num_gangs __attribute__ ((unused)),
+ int num_workers __attribute__ ((unused)),
+ int vector_length __attribute__ ((unused)),
+ int async __attribute__ ((unused)),
+ void *targ_mem_desc __attribute__ ((unused)))
+{
+#ifdef HOST_NONSHM_PLUGIN
+ fn (devaddrs);
+#else
+ fn (hostaddrs);
+#endif
+}
+
+STATIC void
+GOMP_OFFLOAD_openacc_register_async_cleanup (void *targ_mem_desc)
+{
+#ifdef HOST_NONSHM_PLUGIN
+ /* "Asynchronous" launches are executed synchronously on the (non-SHM) host,
+ so there's no point in delaying host-side cleanup -- just do it now. */
+ GOMP_PLUGIN_async_unmap_vars (targ_mem_desc);
+#endif
+}
+
+STATIC void
+GOMP_OFFLOAD_openacc_async_set_async (int async __attribute__ ((unused)))
+{
+}
+
+STATIC int
+GOMP_OFFLOAD_openacc_async_test (int async __attribute__ ((unused)))
+{
+ return 1;
+}
+
+STATIC int
+GOMP_OFFLOAD_openacc_async_test_all (void)
+{
+ return 1;
+}
+
+STATIC void
+GOMP_OFFLOAD_openacc_async_wait (int async __attribute__ ((unused)))
+{
+}
+
+STATIC void
+GOMP_OFFLOAD_openacc_async_wait_all (void)
+{
+}
+
+STATIC void
+GOMP_OFFLOAD_openacc_async_wait_async (int async1 __attribute__ ((unused)),
+ int async2 __attribute__ ((unused)))
+{
+}
+
+STATIC void
+GOMP_OFFLOAD_openacc_async_wait_all_async (int async __attribute__ ((unused)))
+{
+}
+
+STATIC void *
+GOMP_OFFLOAD_openacc_create_thread_data (void *targ_data
+ __attribute__ ((unused)))
+{
+ return NULL;
+}
+
+STATIC void
+GOMP_OFFLOAD_openacc_destroy_thread_data (void *tls_data
+ __attribute__ ((unused)))
+{
+}
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
new file mode 100644
index 0000000..483cb75
--- /dev/null
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -0,0 +1,1791 @@
+/* Plugin for NVPTX execution.
+
+ Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Nvidia PTX-specific parts of OpenACC support. The cuda driver
+ library appears to hold some implicit state, but the documentation
+ is not clear as to what that state might be. Or how one might
+ propagate it from one thread to another. */
+
+#include "openacc.h"
+#include "config.h"
+#include "libgomp-plugin.h"
+#include "oacc-ptx.h"
+#include "oacc-plugin.h"
+
+#include <pthread.h>
+#include <cuda.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <dlfcn.h>
+#include <unistd.h>
+#include <assert.h>
+
+#define ARRAYSIZE(X) (sizeof (X) / sizeof ((X)[0]))
+
+static struct
+{
+ CUresult r;
+ char *m;
+} cuda_errlist[]=
+{
+ { CUDA_ERROR_INVALID_VALUE, "invalid value" },
+ { CUDA_ERROR_OUT_OF_MEMORY, "out of memory" },
+ { CUDA_ERROR_NOT_INITIALIZED, "not initialized" },
+ { CUDA_ERROR_DEINITIALIZED, "deinitialized" },
+ { CUDA_ERROR_PROFILER_DISABLED, "profiler disabled" },
+ { CUDA_ERROR_PROFILER_NOT_INITIALIZED, "profiler not initialized" },
+ { CUDA_ERROR_PROFILER_ALREADY_STARTED, "already started" },
+ { CUDA_ERROR_PROFILER_ALREADY_STOPPED, "already stopped" },
+ { CUDA_ERROR_NO_DEVICE, "no device" },
+ { CUDA_ERROR_INVALID_DEVICE, "invalid device" },
+ { CUDA_ERROR_INVALID_IMAGE, "invalid image" },
+ { CUDA_ERROR_INVALID_CONTEXT, "invalid context" },
+ { CUDA_ERROR_CONTEXT_ALREADY_CURRENT, "context already current" },
+ { CUDA_ERROR_MAP_FAILED, "map error" },
+ { CUDA_ERROR_UNMAP_FAILED, "unmap error" },
+ { CUDA_ERROR_ARRAY_IS_MAPPED, "array is mapped" },
+ { CUDA_ERROR_ALREADY_MAPPED, "already mapped" },
+ { CUDA_ERROR_NO_BINARY_FOR_GPU, "no binary for gpu" },
+ { CUDA_ERROR_ALREADY_ACQUIRED, "already acquired" },
+ { CUDA_ERROR_NOT_MAPPED, "not mapped" },
+ { CUDA_ERROR_NOT_MAPPED_AS_ARRAY, "not mapped as array" },
+ { CUDA_ERROR_NOT_MAPPED_AS_POINTER, "not mapped as pointer" },
+ { CUDA_ERROR_ECC_UNCORRECTABLE, "ecc uncorrectable" },
+ { CUDA_ERROR_UNSUPPORTED_LIMIT, "unsupported limit" },
+ { CUDA_ERROR_CONTEXT_ALREADY_IN_USE, "context already in use" },
+ { CUDA_ERROR_PEER_ACCESS_UNSUPPORTED, "peer access unsupported" },
+ { CUDA_ERROR_INVALID_SOURCE, "invalid source" },
+ { CUDA_ERROR_FILE_NOT_FOUND, "file not found" },
+ { CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND,
+ "shared object symbol not found" },
+ { CUDA_ERROR_SHARED_OBJECT_INIT_FAILED, "shared object init error" },
+ { CUDA_ERROR_OPERATING_SYSTEM, "operating system" },
+ { CUDA_ERROR_INVALID_HANDLE, "invalid handle" },
+ { CUDA_ERROR_NOT_FOUND, "not found" },
+ { CUDA_ERROR_NOT_READY, "not ready" },
+ { CUDA_ERROR_LAUNCH_FAILED, "launch error" },
+ { CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES, "launch out of resources" },
+ { CUDA_ERROR_LAUNCH_TIMEOUT, "launch timeout" },
+ { CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING,
+ "launch incompatibe texturing" },
+ { CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED, "peer access already enabled" },
+ { CUDA_ERROR_PEER_ACCESS_NOT_ENABLED, "peer access not enabled " },
+ { CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE, "primary cotext active" },
+ { CUDA_ERROR_CONTEXT_IS_DESTROYED, "context is destroyed" },
+ { CUDA_ERROR_ASSERT, "assert" },
+ { CUDA_ERROR_TOO_MANY_PEERS, "too many peers" },
+ { CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED,
+ "host memory already registered" },
+ { CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED, "host memory not registered" },
+ { CUDA_ERROR_NOT_PERMITTED, "not permitted" },
+ { CUDA_ERROR_NOT_SUPPORTED, "not supported" },
+ { CUDA_ERROR_UNKNOWN, "unknown" }
+};
+
+static char errmsg[128];
+
+static char *
+cuda_error (CUresult r)
+{
+ int i;
+
+ for (i = 0; i < ARRAYSIZE (cuda_errlist); i++)
+ {
+ if (cuda_errlist[i].r == r)
+ return &cuda_errlist[i].m[0];
+ }
+
+ sprintf (&errmsg[0], "unknown result code: %5d", r);
+
+ return &errmsg[0];
+}
+
+struct targ_fn_descriptor
+{
+ CUfunction fn;
+ const char *name;
+};
+
+static bool ptx_inited = false;
+
+struct ptx_stream
+{
+ CUstream stream;
+ pthread_t host_thread;
+ bool multithreaded;
+
+ CUdeviceptr d;
+ void *h;
+ void *h_begin;
+ void *h_end;
+ void *h_next;
+ void *h_prev;
+ void *h_tail;
+
+ struct ptx_stream *next;
+};
+
+/* Thread-specific data for PTX. */
+
+struct nvptx_thread
+{
+ struct ptx_stream *current_stream;
+ struct ptx_device *ptx_dev;
+};
+
+struct map
+{
+ int async;
+ size_t size;
+ char mappings[0];
+};
+
+static void
+map_init (struct ptx_stream *s)
+{
+ CUresult r;
+
+ int size = getpagesize ();
+
+ assert (s);
+ assert (!s->d);
+ assert (!s->h);
+
+ r = cuMemAllocHost (&s->h, size);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemAllocHost error: %s", cuda_error (r));
+
+ r = cuMemHostGetDevicePointer (&s->d, s->h, 0);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemHostGetDevicePointer error: %s", cuda_error (r));
+
+ assert (s->h);
+
+ s->h_begin = s->h;
+ s->h_end = s->h_begin + size;
+ s->h_next = s->h_prev = s->h_tail = s->h_begin;
+
+ assert (s->h_next);
+ assert (s->h_end);
+}
+
+static void
+map_fini (struct ptx_stream *s)
+{
+ CUresult r;
+
+ r = cuMemFreeHost (s->h);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemFreeHost error: %s", cuda_error (r));
+}
+
+static void
+map_pop (struct ptx_stream *s)
+{
+ struct map *m;
+
+ assert (s != NULL);
+ assert (s->h_next);
+ assert (s->h_prev);
+ assert (s->h_tail);
+
+ m = s->h_tail;
+
+ s->h_tail += m->size;
+
+ if (s->h_tail >= s->h_end)
+ s->h_tail = s->h_begin + (int) (s->h_tail - s->h_end);
+
+ if (s->h_next == s->h_tail)
+ s->h_prev = s->h_next;
+
+ assert (s->h_next >= s->h_begin);
+ assert (s->h_tail >= s->h_begin);
+ assert (s->h_prev >= s->h_begin);
+
+ assert (s->h_next <= s->h_end);
+ assert (s->h_tail <= s->h_end);
+ assert (s->h_prev <= s->h_end);
+}
+
+static void
+map_push (struct ptx_stream *s, int async, size_t size, void **h, void **d)
+{
+ int left;
+ int offset;
+ struct map *m;
+
+ assert (s != NULL);
+
+ left = s->h_end - s->h_next;
+ size += sizeof (struct map);
+
+ assert (s->h_prev);
+ assert (s->h_next);
+
+ if (size >= left)
+ {
+ m = s->h_prev;
+ m->size += left;
+ s->h_next = s->h_begin;
+
+ if (s->h_next + size > s->h_end)
+ GOMP_PLUGIN_fatal ("unable to push map");
+ }
+
+ assert (s->h_next);
+
+ m = s->h_next;
+ m->async = async;
+ m->size = size;
+
+ offset = (void *)&m->mappings[0] - s->h;
+
+ *d = (void *)(s->d + offset);
+ *h = (void *)(s->h + offset);
+
+ s->h_prev = s->h_next;
+ s->h_next += size;
+
+ assert (s->h_prev);
+ assert (s->h_next);
+
+ assert (s->h_next >= s->h_begin);
+ assert (s->h_tail >= s->h_begin);
+ assert (s->h_prev >= s->h_begin);
+ assert (s->h_next <= s->h_end);
+ assert (s->h_tail <= s->h_end);
+ assert (s->h_prev <= s->h_end);
+
+ return;
+}
+
+struct ptx_device
+{
+ CUcontext ctx;
+ bool ctx_shared;
+ CUdevice dev;
+ struct ptx_stream *null_stream;
+ /* All non-null streams associated with this device (actually context),
+ either created implicitly or passed in from the user (via
+ acc_set_cuda_stream). */
+ struct ptx_stream *active_streams;
+ struct {
+ struct ptx_stream **arr;
+ int size;
+ } async_streams;
+ /* A lock for use when manipulating the above stream list and array. */
+ pthread_mutex_t stream_lock;
+ int ord;
+ bool overlap;
+ bool map;
+ bool concur;
+ int mode;
+ bool mkern;
+
+ struct ptx_device *next;
+};
+
+enum ptx_event_type
+{
+ PTX_EVT_MEM,
+ PTX_EVT_KNL,
+ PTX_EVT_SYNC,
+ PTX_EVT_ASYNC_CLEANUP
+};
+
+struct ptx_event
+{
+ CUevent *evt;
+ int type;
+ void *addr;
+ int ord;
+
+ struct ptx_event *next;
+};
+
+static pthread_mutex_t ptx_event_lock;
+static struct ptx_event *ptx_events;
+
+#define _XSTR(s) _STR(s)
+#define _STR(s) #s
+
+static struct _synames
+{
+ char *n;
+} cuda_symnames[] =
+{
+ { _XSTR (cuCtxCreate) },
+ { _XSTR (cuCtxDestroy) },
+ { _XSTR (cuCtxGetCurrent) },
+ { _XSTR (cuCtxPushCurrent) },
+ { _XSTR (cuCtxSynchronize) },
+ { _XSTR (cuDeviceGet) },
+ { _XSTR (cuDeviceGetAttribute) },
+ { _XSTR (cuDeviceGetCount) },
+ { _XSTR (cuEventCreate) },
+ { _XSTR (cuEventDestroy) },
+ { _XSTR (cuEventQuery) },
+ { _XSTR (cuEventRecord) },
+ { _XSTR (cuInit) },
+ { _XSTR (cuLaunchKernel) },
+ { _XSTR (cuLinkAddData) },
+ { _XSTR (cuLinkComplete) },
+ { _XSTR (cuLinkCreate) },
+ { _XSTR (cuMemAlloc) },
+ { _XSTR (cuMemAllocHost) },
+ { _XSTR (cuMemcpy) },
+ { _XSTR (cuMemcpyDtoH) },
+ { _XSTR (cuMemcpyDtoHAsync) },
+ { _XSTR (cuMemcpyHtoD) },
+ { _XSTR (cuMemcpyHtoDAsync) },
+ { _XSTR (cuMemFree) },
+ { _XSTR (cuMemFreeHost) },
+ { _XSTR (cuMemGetAddressRange) },
+ { _XSTR (cuMemHostGetDevicePointer) },
+ { _XSTR (cuMemHostRegister) },
+ { _XSTR (cuMemHostUnregister) },
+ { _XSTR (cuModuleGetFunction) },
+ { _XSTR (cuModuleLoadData) },
+ { _XSTR (cuStreamDestroy) },
+ { _XSTR (cuStreamQuery) },
+ { _XSTR (cuStreamSynchronize) },
+ { _XSTR (cuStreamWaitEvent) }
+};
+
+static int
+verify_device_library (void)
+{
+ int i;
+ void *dh, *ds;
+
+ dh = dlopen ("libcuda.so", RTLD_LAZY);
+ if (!dh)
+ return -1;
+
+ for (i = 0; i < ARRAYSIZE (cuda_symnames); i++)
+ {
+ ds = dlsym (dh, cuda_symnames[i].n);
+ if (!ds)
+ return -1;
+ }
+
+ dlclose (dh);
+
+ return 0;
+}
+
+static inline struct nvptx_thread *
+nvptx_thread (void)
+{
+ return (struct nvptx_thread *) GOMP_PLUGIN_acc_thread ();
+}
+
+static void
+init_streams_for_device (struct ptx_device *ptx_dev, int concurrency)
+{
+ int i;
+ struct ptx_stream *null_stream
+ = GOMP_PLUGIN_malloc (sizeof (struct ptx_stream));
+
+ null_stream->stream = NULL;
+ null_stream->host_thread = pthread_self ();
+ null_stream->multithreaded = true;
+ null_stream->d = (CUdeviceptr) NULL;
+ null_stream->h = NULL;
+ map_init (null_stream);
+ ptx_dev->null_stream = null_stream;
+
+ ptx_dev->active_streams = NULL;
+ pthread_mutex_init (&ptx_dev->stream_lock, NULL);
+
+ if (concurrency < 1)
+ concurrency = 1;
+
+ /* This is just a guess -- make space for as many async streams as the
+ current device is capable of concurrently executing. This can grow
+ later as necessary. No streams are created yet. */
+ ptx_dev->async_streams.arr
+ = GOMP_PLUGIN_malloc (concurrency * sizeof (struct ptx_stream *));
+ ptx_dev->async_streams.size = concurrency;
+
+ for (i = 0; i < concurrency; i++)
+ ptx_dev->async_streams.arr[i] = NULL;
+}
+
+static void
+fini_streams_for_device (struct ptx_device *ptx_dev)
+{
+ free (ptx_dev->async_streams.arr);
+
+ while (ptx_dev->active_streams != NULL)
+ {
+ struct ptx_stream *s = ptx_dev->active_streams;
+ ptx_dev->active_streams = ptx_dev->active_streams->next;
+
+ cuStreamDestroy (s->stream);
+ map_fini (s);
+ free (s);
+ }
+
+ map_fini (ptx_dev->null_stream);
+ free (ptx_dev->null_stream);
+}
+
+/* Select a stream for (OpenACC-semantics) ASYNC argument for the current
+ thread THREAD (and also current device/context). If CREATE is true, create
+ the stream if it does not exist (or use EXISTING if it is non-NULL), and
+ associate the stream with the same thread argument. Returns stream to use
+ as result. */
+
+static struct ptx_stream *
+select_stream_for_async (int async, pthread_t thread, bool create,
+ CUstream existing)
+{
+ struct nvptx_thread *nvthd = nvptx_thread ();
+ /* Local copy of TLS variable. */
+ struct ptx_device *ptx_dev = nvthd->ptx_dev;
+ struct ptx_stream *stream = NULL;
+ int orig_async = async;
+
+ /* The special value acc_async_noval (-1) maps (for now) to an
+ implicitly-created stream, which is then handled the same as any other
+ numbered async stream. Other options are available, e.g. using the null
+ stream for anonymous async operations, or choosing an idle stream from an
+ active set. But, stick with this for now. */
+ if (async > acc_async_sync)
+ async++;
+
+ if (create)
+ pthread_mutex_lock (&ptx_dev->stream_lock);
+
+ /* NOTE: AFAICT there's no particular need for acc_async_sync to map to the
+ null stream, and in fact better performance may be obtainable if it doesn't
+ (because the null stream enforces overly-strict synchronisation with
+ respect to other streams for legacy reasons, and that's probably not
+ needed with OpenACC). Maybe investigate later. */
+ if (async == acc_async_sync)
+ stream = ptx_dev->null_stream;
+ else if (async >= 0 && async < ptx_dev->async_streams.size
+ && ptx_dev->async_streams.arr[async] && !(create && existing))
+ stream = ptx_dev->async_streams.arr[async];
+ else if (async >= 0 && create)
+ {
+ if (async >= ptx_dev->async_streams.size)
+ {
+ int i, newsize = ptx_dev->async_streams.size * 2;
+
+ if (async >= newsize)
+ newsize = async + 1;
+
+ ptx_dev->async_streams.arr
+ = GOMP_PLUGIN_realloc (ptx_dev->async_streams.arr,
+ newsize * sizeof (struct ptx_stream *));
+
+ for (i = ptx_dev->async_streams.size; i < newsize; i++)
+ ptx_dev->async_streams.arr[i] = NULL;
+
+ ptx_dev->async_streams.size = newsize;
+ }
+
+ /* Create a new stream on-demand if there isn't one already, or if we're
+ setting a particular async value to an existing (externally-provided)
+ stream. */
+ if (!ptx_dev->async_streams.arr[async] || existing)
+ {
+ CUresult r;
+ struct ptx_stream *s
+ = GOMP_PLUGIN_malloc (sizeof (struct ptx_stream));
+
+ if (existing)
+ s->stream = existing;
+ else
+ {
+ r = cuStreamCreate (&s->stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuStreamCreate error: %s", cuda_error (r));
+ }
+
+ /* If CREATE is true, we're going to be queueing some work on this
+ stream. Associate it with the current host thread. */
+ s->host_thread = thread;
+ s->multithreaded = false;
+
+ s->d = (CUdeviceptr) NULL;
+ s->h = NULL;
+ map_init (s);
+
+ s->next = ptx_dev->active_streams;
+ ptx_dev->active_streams = s;
+ ptx_dev->async_streams.arr[async] = s;
+ }
+
+ stream = ptx_dev->async_streams.arr[async];
+ }
+ else if (async < 0)
+ GOMP_PLUGIN_fatal ("bad async %d", async);
+
+ if (create)
+ {
+ assert (stream != NULL);
+
+ /* If we're trying to use the same stream from different threads
+ simultaneously, set stream->multithreaded to true. This affects the
+ behaviour of acc_async_test_all and acc_wait_all, which are supposed to
+ only wait for asynchronous launches from the same host thread they are
+ invoked on. If multiple threads use the same async value, we make note
+ of that here and fall back to testing/waiting for all threads in those
+ functions. */
+ if (thread != stream->host_thread)
+ stream->multithreaded = true;
+
+ pthread_mutex_unlock (&ptx_dev->stream_lock);
+ }
+ else if (stream && !stream->multithreaded
+ && !pthread_equal (stream->host_thread, thread))
+ GOMP_PLUGIN_fatal ("async %d used on wrong thread", orig_async);
+
+ return stream;
+}
+
+static int nvptx_get_num_devices (void);
+
+/* Initialize the device. */
+static int
+nvptx_init (void)
+{
+ CUresult r;
+ int rc;
+
+ if (ptx_inited)
+ return nvptx_get_num_devices ();
+
+ rc = verify_device_library ();
+ if (rc < 0)
+ return -1;
+
+ r = cuInit (0);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuInit error: %s", cuda_error (r));
+
+ ptx_events = NULL;
+
+ pthread_mutex_init (&ptx_event_lock, NULL);
+
+ ptx_inited = true;
+
+ return nvptx_get_num_devices ();
+}
+
+static void
+nvptx_fini (void)
+{
+ ptx_inited = false;
+}
+
+static void *
+nvptx_open_device (int n)
+{
+ struct ptx_device *ptx_dev;
+ CUdevice dev;
+ CUresult r;
+ int async_engines, pi;
+
+ r = cuDeviceGet (&dev, n);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuDeviceGet error: %s", cuda_error (r));
+
+ ptx_dev = GOMP_PLUGIN_malloc (sizeof (struct ptx_device));
+
+ ptx_dev->ord = n;
+ ptx_dev->dev = dev;
+ ptx_dev->ctx_shared = false;
+
+ r = cuCtxGetCurrent (&ptx_dev->ctx);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuCtxGetCurrent error: %s", cuda_error (r));
+
+ if (!ptx_dev->ctx)
+ {
+ r = cuCtxCreate (&ptx_dev->ctx, CU_CTX_SCHED_AUTO, dev);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuCtxCreate error: %s", cuda_error (r));
+ }
+ else
+ ptx_dev->ctx_shared = true;
+
+ r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, dev);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r));
+
+ ptx_dev->overlap = pi;
+
+ r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r));
+
+ ptx_dev->map = pi;
+
+ r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, dev);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r));
+
+ ptx_dev->concur = pi;
+
+ r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, dev);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r));
+
+ ptx_dev->mode = pi;
+
+ r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_INTEGRATED, dev);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r));
+
+ ptx_dev->mkern = pi;
+
+ r = cuDeviceGetAttribute (&async_engines,
+ CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, dev);
+ if (r != CUDA_SUCCESS)
+ async_engines = 1;
+
+ init_streams_for_device (ptx_dev, async_engines);
+
+ return (void *) ptx_dev;
+}
+
+static int
+nvptx_close_device (void *targ_data)
+{
+ CUresult r;
+ struct ptx_device *ptx_dev = targ_data;
+
+ if (!ptx_dev)
+ return 0;
+
+ fini_streams_for_device (ptx_dev);
+
+ if (!ptx_dev->ctx_shared)
+ {
+ r = cuCtxDestroy (ptx_dev->ctx);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuCtxDestroy error: %s", cuda_error (r));
+ }
+
+ free (ptx_dev);
+
+ return 0;
+}
+
+static int
+nvptx_get_num_devices (void)
+{
+ int n;
+ CUresult r;
+
+ /* This function will be called before the plugin has been initialized in
+ order to enumerate available devices, but CUDA API routines can't be used
+ until cuInit has been called. Just call it now (but don't yet do any
+ further initialization). */
+ if (!ptx_inited)
+ cuInit (0);
+
+ r = cuDeviceGetCount (&n);
+ if (r!= CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuDeviceGetCount error: %s", cuda_error (r));
+
+ return n;
+}
+
+
+static void
+link_ptx (CUmodule *module, char *ptx_code)
+{
+ CUjit_option opts[7];
+ void *optvals[7];
+ float elapsed = 0.0;
+#define LOGSIZE 8192
+ char elog[LOGSIZE];
+ char ilog[LOGSIZE];
+ unsigned long logsize = LOGSIZE;
+ CUlinkState linkstate;
+ CUresult r;
+ void *linkout;
+ size_t linkoutsize __attribute__ ((unused));
+
+ GOMP_PLUGIN_debug (0, "attempting to load:\n---\n%s\n---\n", ptx_code);
+
+ opts[0] = CU_JIT_WALL_TIME;
+ optvals[0] = &elapsed;
+
+ opts[1] = CU_JIT_INFO_LOG_BUFFER;
+ optvals[1] = &ilog[0];
+
+ opts[2] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
+ optvals[2] = (void *) logsize;
+
+ opts[3] = CU_JIT_ERROR_LOG_BUFFER;
+ optvals[3] = &elog[0];
+
+ opts[4] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
+ optvals[4] = (void *) logsize;
+
+ opts[5] = CU_JIT_LOG_VERBOSE;
+ optvals[5] = (void *) 1;
+
+ opts[6] = CU_JIT_TARGET;
+ optvals[6] = (void *) CU_TARGET_COMPUTE_30;
+
+ r = cuLinkCreate (7, opts, optvals, &linkstate);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuLinkCreate error: %s", cuda_error (r));
+
+ char *abort_ptx = ABORT_PTX;
+ r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, abort_ptx,
+ strlen (abort_ptx) + 1, 0, 0, 0, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
+ GOMP_PLUGIN_fatal ("cuLinkAddData (abort) error: %s", cuda_error (r));
+ }
+
+ char *acc_on_device_ptx = ACC_ON_DEVICE_PTX;
+ r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, acc_on_device_ptx,
+ strlen (acc_on_device_ptx) + 1, 0, 0, 0, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
+ GOMP_PLUGIN_fatal ("cuLinkAddData (acc_on_device) error: %s",
+ cuda_error (r));
+ }
+
+ char *goacc_internal_ptx = GOACC_INTERNAL_PTX;
+ r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, goacc_internal_ptx,
+ strlen (goacc_internal_ptx) + 1, 0, 0, 0, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
+ GOMP_PLUGIN_fatal ("cuLinkAddData (goacc_internal_ptx) error: %s",
+ cuda_error (r));
+ }
+
+ r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, ptx_code,
+ strlen (ptx_code) + 1, 0, 0, 0, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
+ GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s", cuda_error (r));
+ }
+
+ r = cuLinkComplete (linkstate, &linkout, &linkoutsize);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuLinkComplete error: %s", cuda_error (r));
+
+ GOMP_PLUGIN_debug (0, "Link complete: %fms\n", elapsed);
+ GOMP_PLUGIN_debug (0, "Link log %s\n", &ilog[0]);
+
+ r = cuModuleLoadData (module, linkout);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuModuleLoadData error: %s", cuda_error (r));
+}
+
+static void
+event_gc (bool memmap_lockable)
+{
+ struct ptx_event *ptx_event = ptx_events;
+ struct nvptx_thread *nvthd = nvptx_thread ();
+
+ pthread_mutex_lock (&ptx_event_lock);
+
+ while (ptx_event != NULL)
+ {
+ CUresult r;
+ struct ptx_event *e = ptx_event;
+
+ ptx_event = ptx_event->next;
+
+ if (e->ord != nvthd->ptx_dev->ord)
+ continue;
+
+ r = cuEventQuery (*e->evt);
+ if (r == CUDA_SUCCESS)
+ {
+ CUevent *te;
+
+ te = e->evt;
+
+ switch (e->type)
+ {
+ case PTX_EVT_MEM:
+ case PTX_EVT_SYNC:
+ break;
+
+ case PTX_EVT_KNL:
+ map_pop (e->addr);
+ break;
+
+ case PTX_EVT_ASYNC_CLEANUP:
+ {
+ /* The function gomp_plugin_async_unmap_vars needs to claim the
+ memory-map splay tree lock for the current device, so we
+ can't call it when one of our callers has already claimed
+ the lock. In that case, just delay the GC for this event
+ until later. */
+ if (!memmap_lockable)
+ continue;
+
+ GOMP_PLUGIN_async_unmap_vars (e->addr);
+ }
+ break;
+ }
+
+ cuEventDestroy (*te);
+ free ((void *)te);
+
+ if (ptx_events == e)
+ ptx_events = ptx_events->next;
+ else
+ {
+ struct ptx_event *e_ = ptx_events;
+ while (e_->next != e)
+ e_ = e_->next;
+ e_->next = e_->next->next;
+ }
+
+ free (e);
+ }
+ }
+
+ pthread_mutex_unlock (&ptx_event_lock);
+}
+
+static void
+event_add (enum ptx_event_type type, CUevent *e, void *h)
+{
+ struct ptx_event *ptx_event;
+ struct nvptx_thread *nvthd = nvptx_thread ();
+
+ assert (type == PTX_EVT_MEM || type == PTX_EVT_KNL || type == PTX_EVT_SYNC
+ || type == PTX_EVT_ASYNC_CLEANUP);
+
+ ptx_event = GOMP_PLUGIN_malloc (sizeof (struct ptx_event));
+ ptx_event->type = type;
+ ptx_event->evt = e;
+ ptx_event->addr = h;
+ ptx_event->ord = nvthd->ptx_dev->ord;
+
+ pthread_mutex_lock (&ptx_event_lock);
+
+ ptx_event->next = ptx_events;
+ ptx_events = ptx_event;
+
+ pthread_mutex_unlock (&ptx_event_lock);
+}
+
+void
+nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
+ size_t *sizes, unsigned short *kinds, int num_gangs, int num_workers,
+ int vector_length, int async, void *targ_mem_desc)
+{
+ struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn;
+ CUfunction function;
+ CUresult r;
+ int i;
+ struct ptx_stream *dev_str;
+ void *kargs[1];
+ void *hp, *dp;
+ unsigned int nthreads_in_block;
+ struct nvptx_thread *nvthd = nvptx_thread ();
+ const char *maybe_abort_msg = "(perhaps abort was called)";
+
+ function = targ_fn->fn;
+
+ dev_str = select_stream_for_async (async, pthread_self (), false, NULL);
+ assert (dev_str == nvthd->current_stream);
+
+ /* This reserves a chunk of a pre-allocated page of memory mapped on both
+ the host and the device. HP is a host pointer to the new chunk, and DP is
+ the corresponding device pointer. */
+ map_push (dev_str, async, mapnum * sizeof (void *), &hp, &dp);
+
+ GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__);
+
+ /* Copy the array of arguments to the mapped page. */
+ for (i = 0; i < mapnum; i++)
+ ((void **) hp)[i] = devaddrs[i];
+
+ /* Copy the (device) pointers to arguments to the device (dp and hp might in
+ fact have the same value on a unified-memory system). */
+ r = cuMemcpy ((CUdeviceptr)dp, (CUdeviceptr)hp, mapnum * sizeof (void *));
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemcpy failed: %s", cuda_error (r));
+
+ GOMP_PLUGIN_debug (0, " %s: kernel %s: launch\n", __FUNCTION__, targ_fn->name);
+
+ // OpenACC CUDA
+ //
+ // num_gangs blocks
+ // num_workers warps (where a warp is equivalent to 32 threads)
+ // vector length threads
+ //
+
+ /* The openacc vector_length clause 'determines the vector length to use for
+ vector or SIMD operations'. The question is how to map this to CUDA.
+
+ In CUDA, the warp size is the vector length of a CUDA device. However, the
+ CUDA interface abstracts away from that, and only shows us warp size
+ indirectly in maximum number of threads per block, which is a product of
+ warp size and the number of hyperthreads of a multiprocessor.
+
+ We choose to map openacc vector_length directly onto the number of threads
+ in a block, in the x dimension. This is reflected in gcc code generation
+ that uses ThreadIdx.x to access vector elements.
+
+ Attempting to use an openacc vector_length of more than the maximum number
+ of threads per block will result in a cuda error. */
+ nthreads_in_block = vector_length;
+
+ kargs[0] = &dp;
+ r = cuLaunchKernel (function,
+ num_gangs, 1, 1,
+ nthreads_in_block, 1, 1,
+ 0, dev_str->stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuLaunchKernel error: %s", cuda_error (r));
+
+#ifndef DISABLE_ASYNC
+ if (async < acc_async_noval)
+ {
+ r = cuStreamSynchronize (dev_str->stream);
+ if (r == CUDA_ERROR_LAUNCH_FAILED)
+ GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s %s\n", cuda_error (r),
+ maybe_abort_msg);
+ else if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r));
+ }
+ else
+ {
+ CUevent *e;
+
+ e = (CUevent *)GOMP_PLUGIN_malloc (sizeof (CUevent));
+
+ r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING);
+ if (r == CUDA_ERROR_LAUNCH_FAILED)
+ GOMP_PLUGIN_fatal ("cuEventCreate error: %s %s\n", cuda_error (r),
+ maybe_abort_msg);
+ else if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuEventCreate error: %s", cuda_error (r));
+
+ event_gc (true);
+
+ r = cuEventRecord (*e, dev_str->stream);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r));
+
+ event_add (PTX_EVT_KNL, e, (void *)dev_str);
+ }
+#else
+ r = cuCtxSynchronize ();
+ if (r == CUDA_ERROR_LAUNCH_FAILED)
+ GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s %s\n", cuda_error (r),
+ maybe_abort_msg);
+ else if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s", cuda_error (r));
+#endif
+
+ GOMP_PLUGIN_debug (0, " %s: kernel %s: finished\n", __FUNCTION__,
+ targ_fn->name);
+
+#ifndef DISABLE_ASYNC
+ if (async < acc_async_noval)
+#endif
+ map_pop (dev_str);
+}
+
+void * openacc_get_current_cuda_context (void);
+
+static void *
+nvptx_alloc (size_t s)
+{
+ CUdeviceptr d;
+ CUresult r;
+
+ r = cuMemAlloc (&d, s);
+ if (r == CUDA_ERROR_OUT_OF_MEMORY)
+ return 0;
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemAlloc error: %s", cuda_error (r));
+ return (void *)d;
+}
+
+static void
+nvptx_free (void *p)
+{
+ CUresult r;
+ CUdeviceptr pb;
+ size_t ps;
+
+ r = cuMemGetAddressRange (&pb, &ps, (CUdeviceptr)p);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemGetAddressRange error: %s", cuda_error (r));
+
+ if ((CUdeviceptr)p != pb)
+ GOMP_PLUGIN_fatal ("invalid device address");
+
+ r = cuMemFree ((CUdeviceptr)p);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemFree error: %s", cuda_error (r));
+}
+
+static void *
+nvptx_host2dev (void *d, const void *h, size_t s)
+{
+ CUresult r;
+ CUdeviceptr pb;
+ size_t ps;
+ struct nvptx_thread *nvthd = nvptx_thread ();
+
+ if (!s)
+ return 0;
+
+ if (!d)
+ GOMP_PLUGIN_fatal ("invalid device address");
+
+ r = cuMemGetAddressRange (&pb, &ps, (CUdeviceptr)d);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemGetAddressRange error: %s", cuda_error (r));
+
+ if (!pb)
+ GOMP_PLUGIN_fatal ("invalid device address");
+
+ if (!h)
+ GOMP_PLUGIN_fatal ("invalid host address");
+
+ if (d == h)
+ GOMP_PLUGIN_fatal ("invalid host or device address");
+
+ if ((void *)(d + s) > (void *)(pb + ps))
+ GOMP_PLUGIN_fatal ("invalid size");
+
+#ifndef DISABLE_ASYNC
+ if (nvthd->current_stream != nvthd->ptx_dev->null_stream)
+ {
+ CUevent *e;
+
+ e = (CUevent *)GOMP_PLUGIN_malloc (sizeof (CUevent));
+
+ r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuEventCreate error: %s", cuda_error (r));
+
+ event_gc (false);
+
+ r = cuMemcpyHtoDAsync ((CUdeviceptr)d, h, s,
+ nvthd->current_stream->stream);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemcpyHtoDAsync error: %s", cuda_error (r));
+
+ r = cuEventRecord (*e, nvthd->current_stream->stream);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r));
+
+ event_add (PTX_EVT_MEM, e, (void *)h);
+ }
+ else
+#endif
+ {
+ r = cuMemcpyHtoD ((CUdeviceptr)d, h, s);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemcpyHtoD error: %s", cuda_error (r));
+ }
+
+ return 0;
+}
+
+static void *
+nvptx_dev2host (void *h, const void *d, size_t s)
+{
+ CUresult r;
+ CUdeviceptr pb;
+ size_t ps;
+ struct nvptx_thread *nvthd = nvptx_thread ();
+
+ if (!s)
+ return 0;
+
+ if (!d)
+ GOMP_PLUGIN_fatal ("invalid device address");
+
+ r = cuMemGetAddressRange (&pb, &ps, (CUdeviceptr)d);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemGetAddressRange error: %s", cuda_error (r));
+
+ if (!pb)
+ GOMP_PLUGIN_fatal ("invalid device address");
+
+ if (!h)
+ GOMP_PLUGIN_fatal ("invalid host address");
+
+ if (d == h)
+ GOMP_PLUGIN_fatal ("invalid host or device address");
+
+ if ((void *)(d + s) > (void *)(pb + ps))
+ GOMP_PLUGIN_fatal ("invalid size");
+
+#ifndef DISABLE_ASYNC
+ if (nvthd->current_stream != nvthd->ptx_dev->null_stream)
+ {
+ CUevent *e;
+
+ e = (CUevent *)GOMP_PLUGIN_malloc (sizeof (CUevent));
+
+ r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuEventCreate error: %s\n", cuda_error (r));
+
+ event_gc (false);
+
+ r = cuMemcpyDtoHAsync (h, (CUdeviceptr)d, s,
+ nvthd->current_stream->stream);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemcpyDtoHAsync error: %s", cuda_error (r));
+
+ r = cuEventRecord (*e, nvthd->current_stream->stream);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r));
+
+ event_add (PTX_EVT_MEM, e, (void *)h);
+ }
+ else
+#endif
+ {
+ r = cuMemcpyDtoH (h, (CUdeviceptr)d, s);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemcpyDtoH error: %s", cuda_error (r));
+ }
+
+ return 0;
+}
+
+static void
+nvptx_set_async (int async)
+{
+ struct nvptx_thread *nvthd = nvptx_thread ();
+ nvthd->current_stream
+ = select_stream_for_async (async, pthread_self (), true, NULL);
+}
+
+static int
+nvptx_async_test (int async)
+{
+ CUresult r;
+ struct ptx_stream *s;
+
+ s = select_stream_for_async (async, pthread_self (), false, NULL);
+
+ if (!s)
+ GOMP_PLUGIN_fatal ("unknown async %d", async);
+
+ r = cuStreamQuery (s->stream);
+ if (r == CUDA_SUCCESS)
+ {
+ /* The oacc-parallel.c:goacc_wait function calls this hook to determine
+ whether all work has completed on this stream, and if so omits the call
+ to the wait hook. If that happens, event_gc might not get called
+ (which prevents variables from getting unmapped and their associated
+ device storage freed), so call it here. */
+ event_gc (true);
+ return 1;
+ }
+ else if (r == CUDA_ERROR_NOT_READY)
+ return 0;
+
+ GOMP_PLUGIN_fatal ("cuStreamQuery error: %s", cuda_error (r));
+
+ return 0;
+}
+
+static int
+nvptx_async_test_all (void)
+{
+ struct ptx_stream *s;
+ pthread_t self = pthread_self ();
+ struct nvptx_thread *nvthd = nvptx_thread ();
+
+ pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
+
+ for (s = nvthd->ptx_dev->active_streams; s != NULL; s = s->next)
+ {
+ if ((s->multithreaded || pthread_equal (s->host_thread, self))
+ && cuStreamQuery (s->stream) == CUDA_ERROR_NOT_READY)
+ {
+ pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+ return 0;
+ }
+ }
+
+ pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+
+ event_gc (true);
+
+ return 1;
+}
+
+static void
+nvptx_wait (int async)
+{
+ CUresult r;
+ struct ptx_stream *s;
+
+ s = select_stream_for_async (async, pthread_self (), false, NULL);
+
+ if (!s)
+ GOMP_PLUGIN_fatal ("unknown async %d", async);
+
+ r = cuStreamSynchronize (s->stream);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r));
+
+ event_gc (true);
+}
+
+static void
+nvptx_wait_async (int async1, int async2)
+{
+ CUresult r;
+ CUevent *e;
+ struct ptx_stream *s1, *s2;
+ pthread_t self = pthread_self ();
+
+ /* The stream that is waiting (rather than being waited for) doesn't
+ necessarily have to exist already. */
+ s2 = select_stream_for_async (async2, self, true, NULL);
+
+ s1 = select_stream_for_async (async1, self, false, NULL);
+ if (!s1)
+ GOMP_PLUGIN_fatal ("invalid async 1\n");
+
+ if (s1 == s2)
+ GOMP_PLUGIN_fatal ("identical parameters");
+
+ e = (CUevent *)GOMP_PLUGIN_malloc (sizeof (CUevent));
+
+ r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuEventCreate error: %s", cuda_error (r));
+
+ event_gc (true);
+
+ r = cuEventRecord (*e, s1->stream);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r));
+
+ event_add (PTX_EVT_SYNC, e, NULL);
+
+ r = cuStreamWaitEvent (s2->stream, *e, 0);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuStreamWaitEvent error: %s", cuda_error (r));
+}
+
+static void
+nvptx_wait_all (void)
+{
+ CUresult r;
+ struct ptx_stream *s;
+ pthread_t self = pthread_self ();
+ struct nvptx_thread *nvthd = nvptx_thread ();
+
+ pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
+
+ /* Wait for active streams initiated by this thread (or by multiple threads)
+ to complete. */
+ for (s = nvthd->ptx_dev->active_streams; s != NULL; s = s->next)
+ {
+ if (s->multithreaded || pthread_equal (s->host_thread, self))
+ {
+ r = cuStreamQuery (s->stream);
+ if (r == CUDA_SUCCESS)
+ continue;
+ else if (r != CUDA_ERROR_NOT_READY)
+ GOMP_PLUGIN_fatal ("cuStreamQuery error: %s", cuda_error (r));
+
+ r = cuStreamSynchronize (s->stream);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r));
+ }
+ }
+
+ pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+
+ event_gc (true);
+}
+
+static void
+nvptx_wait_all_async (int async)
+{
+ CUresult r;
+ struct ptx_stream *waiting_stream, *other_stream;
+ CUevent *e;
+ struct nvptx_thread *nvthd = nvptx_thread ();
+ pthread_t self = pthread_self ();
+
+ /* The stream doing the waiting. This could be the first mention of the
+ stream, so create it if necessary. */
+ waiting_stream
+ = select_stream_for_async (async, pthread_self (), true, NULL);
+
+ /* Launches on the null stream already block on other streams in the
+ context. */
+ if (!waiting_stream || waiting_stream == nvthd->ptx_dev->null_stream)
+ return;
+
+ event_gc (true);
+
+ pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
+
+ for (other_stream = nvthd->ptx_dev->active_streams;
+ other_stream != NULL;
+ other_stream = other_stream->next)
+ {
+ if (!other_stream->multithreaded
+ && !pthread_equal (other_stream->host_thread, self))
+ continue;
+
+ e = (CUevent *) GOMP_PLUGIN_malloc (sizeof (CUevent));
+
+ r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuEventCreate error: %s", cuda_error (r));
+
+ /* Record an event on the waited-for stream. */
+ r = cuEventRecord (*e, other_stream->stream);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r));
+
+ event_add (PTX_EVT_SYNC, e, NULL);
+
+ r = cuStreamWaitEvent (waiting_stream->stream, *e, 0);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuStreamWaitEvent error: %s", cuda_error (r));
+ }
+
+ pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+}
+
+static void *
+nvptx_get_current_cuda_device (void)
+{
+ struct nvptx_thread *nvthd = nvptx_thread ();
+
+ if (!nvthd || !nvthd->ptx_dev)
+ return NULL;
+
+ return &nvthd->ptx_dev->dev;
+}
+
+static void *
+nvptx_get_current_cuda_context (void)
+{
+ struct nvptx_thread *nvthd = nvptx_thread ();
+
+ if (!nvthd || !nvthd->ptx_dev)
+ return NULL;
+
+ return nvthd->ptx_dev->ctx;
+}
+
+static void *
+nvptx_get_cuda_stream (int async)
+{
+ struct ptx_stream *s;
+ struct nvptx_thread *nvthd = nvptx_thread ();
+
+ if (!nvthd || !nvthd->ptx_dev)
+ return NULL;
+
+ s = select_stream_for_async (async, pthread_self (), false, NULL);
+
+ return s ? s->stream : NULL;
+}
+
+static int
+nvptx_set_cuda_stream (int async, void *stream)
+{
+ struct ptx_stream *oldstream;
+ pthread_t self = pthread_self ();
+ struct nvptx_thread *nvthd = nvptx_thread ();
+
+ pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
+
+ if (async < 0)
+ GOMP_PLUGIN_fatal ("bad async %d", async);
+
+ /* We have a list of active streams and an array mapping async values to
+ entries of that list. We need to take "ownership" of the passed-in stream,
+ and add it to our list, removing the previous entry also (if there was one)
+ in order to prevent resource leaks. Note the potential for surprise
+ here: maybe we should keep track of passed-in streams and leave it up to
+ the user to tidy those up, but that doesn't work for stream handles
+ returned from acc_get_cuda_stream above... */
+
+ oldstream = select_stream_for_async (async, self, false, NULL);
+
+ if (oldstream)
+ {
+ if (nvthd->ptx_dev->active_streams == oldstream)
+ nvthd->ptx_dev->active_streams = nvthd->ptx_dev->active_streams->next;
+ else
+ {
+ struct ptx_stream *s = nvthd->ptx_dev->active_streams;
+ while (s->next != oldstream)
+ s = s->next;
+ s->next = s->next->next;
+ }
+
+ cuStreamDestroy (oldstream->stream);
+ map_fini (oldstream);
+ free (oldstream);
+ }
+
+ pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+
+ (void) select_stream_for_async (async, self, true, (CUstream) stream);
+
+ return 1;
+}
+
+/* Plugin entry points. */
+
+const char *
+GOMP_OFFLOAD_get_name (void)
+{
+ return "nvptx";
+}
+
+unsigned int
+GOMP_OFFLOAD_get_caps (void)
+{
+ return GOMP_OFFLOAD_CAP_OPENACC_200;
+}
+
+int
+GOMP_OFFLOAD_get_type (void)
+{
+ return OFFLOAD_TARGET_TYPE_NVIDIA_PTX;
+}
+
+int
+GOMP_OFFLOAD_get_num_devices (void)
+{
+ return nvptx_get_num_devices ();
+}
+
+static void **kernel_target_data;
+static void **kernel_host_table;
+
+void
+GOMP_OFFLOAD_register_image (void *host_table, void *target_data)
+{
+ kernel_target_data = target_data;
+ kernel_host_table = host_table;
+}
+
+void
+GOMP_OFFLOAD_init_device (int n __attribute__ ((unused)))
+{
+ (void) nvptx_init ();
+}
+
+void
+GOMP_OFFLOAD_fini_device (int n __attribute__ ((unused)))
+{
+ nvptx_fini ();
+}
+
+int
+GOMP_OFFLOAD_get_table (int n __attribute__ ((unused)),
+ struct mapping_table **tablep)
+{
+ CUmodule module;
+ void **fn_table;
+ char **fn_names;
+ int fn_entries, i;
+ CUresult r;
+ struct targ_fn_descriptor *targ_fns;
+
+ if (nvptx_init () <= 0)
+ return 0;
+
+ /* This isn't an error, because an image may legitimately have no offloaded
+ regions and so will not call GOMP_offload_register. */
+ if (kernel_target_data == NULL)
+ return 0;
+
+ link_ptx (&module, kernel_target_data[0]);
+
+ /* kernel_target_data[0] -> ptx code
+ kernel_target_data[1] -> variable mappings
+ kernel_target_data[2] -> array of kernel names in ascii
+
+ kernel_host_table[0] -> start of function addresses (__offload_func_table)
+ kernel_host_table[1] -> end of function addresses (__offload_funcs_end)
+
+ The array of kernel names and the functions addresses form a
+ one-to-one correspondence. */
+
+ fn_table = kernel_host_table[0];
+ fn_names = (char **) kernel_target_data[2];
+ fn_entries = (kernel_host_table[1] - kernel_host_table[0]) / sizeof (void *);
+
+ *tablep = GOMP_PLUGIN_malloc (sizeof (struct mapping_table) * fn_entries);
+ targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor)
+ * fn_entries);
+
+ for (i = 0; i < fn_entries; i++)
+ {
+ CUfunction function;
+
+ r = cuModuleGetFunction (&function, module, fn_names[i]);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuModuleGetFunction error: %s", cuda_error (r));
+
+ targ_fns[i].fn = function;
+ targ_fns[i].name = (const char *) fn_names[i];
+
+ (*tablep)[i].host_start = (uintptr_t) fn_table[i];
+ (*tablep)[i].host_end = (*tablep)[i].host_start + 1;
+ (*tablep)[i].tgt_start = (uintptr_t) &targ_fns[i];
+ (*tablep)[i].tgt_end = (*tablep)[i].tgt_start + 1;
+ }
+
+ return fn_entries;
+}
+
+void *
+GOMP_OFFLOAD_alloc (int n __attribute__ ((unused)), size_t size)
+{
+ return nvptx_alloc (size);
+}
+
+void
+GOMP_OFFLOAD_free (int n __attribute__ ((unused)), void *ptr)
+{
+ nvptx_free (ptr);
+}
+
+void *
+GOMP_OFFLOAD_dev2host (int ord __attribute__ ((unused)), void *dst,
+ const void *src, size_t n)
+{
+ return nvptx_dev2host (dst, src, n);
+}
+
+void *
+GOMP_OFFLOAD_host2dev (int ord __attribute__ ((unused)), void *dst,
+ const void *src, size_t n)
+{
+ return nvptx_host2dev (dst, src, n);
+}
+
+void (*device_run) (int n, void *fn_ptr, void *vars) = NULL;
+
+void
+GOMP_OFFLOAD_openacc_parallel (void (*fn) (void *), size_t mapnum,
+ void **hostaddrs, void **devaddrs, size_t *sizes,
+ unsigned short *kinds, int num_gangs,
+ int num_workers, int vector_length, int async,
+ void *targ_mem_desc)
+{
+ nvptx_exec (fn, mapnum, hostaddrs, devaddrs, sizes, kinds, num_gangs,
+ num_workers, vector_length, async, targ_mem_desc);
+}
+
+void *
+GOMP_OFFLOAD_openacc_open_device (int n)
+{
+ return nvptx_open_device (n);
+}
+
+int
+GOMP_OFFLOAD_openacc_close_device (void *h)
+{
+ return nvptx_close_device (h);
+}
+
+void
+GOMP_OFFLOAD_openacc_set_device_num (int n)
+{
+ struct nvptx_thread *nvthd = nvptx_thread ();
+
+ assert (n >= 0);
+
+ if (!nvthd->ptx_dev || nvthd->ptx_dev->ord != n)
+ (void) nvptx_open_device (n);
+}
+
+/* This can be called before the device is "opened" for the current thread, in
+ which case we can't tell which device number should be returned. We don't
+ actually want to open the device here, so just return -1 and let the caller
+ (oacc-init.c:acc_get_device_num) handle it. */
+
+int
+GOMP_OFFLOAD_openacc_get_device_num (void)
+{
+ struct nvptx_thread *nvthd = nvptx_thread ();
+
+ if (nvthd && nvthd->ptx_dev)
+ return nvthd->ptx_dev->ord;
+ else
+ return -1;
+}
+
+void
+GOMP_OFFLOAD_openacc_register_async_cleanup (void *targ_mem_desc)
+{
+ CUevent *e;
+ CUresult r;
+ struct nvptx_thread *nvthd = nvptx_thread ();
+
+ e = (CUevent *) GOMP_PLUGIN_malloc (sizeof (CUevent));
+
+ r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuEventCreate error: %s", cuda_error (r));
+
+ r = cuEventRecord (*e, nvthd->current_stream->stream);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r));
+
+ event_add (PTX_EVT_ASYNC_CLEANUP, e, targ_mem_desc);
+}
+
+int
+GOMP_OFFLOAD_openacc_async_test (int async)
+{
+ return nvptx_async_test (async);
+}
+
+int
+GOMP_OFFLOAD_openacc_async_test_all (void)
+{
+ return nvptx_async_test_all ();
+}
+
+void
+GOMP_OFFLOAD_openacc_async_wait (int async)
+{
+ nvptx_wait (async);
+}
+
+void
+GOMP_OFFLOAD_openacc_async_wait_async (int async1, int async2)
+{
+ nvptx_wait_async (async1, async2);
+}
+
+void
+GOMP_OFFLOAD_openacc_async_wait_all (void)
+{
+ nvptx_wait_all ();
+}
+
+void
+GOMP_OFFLOAD_openacc_async_wait_all_async (int async)
+{
+ nvptx_wait_all_async (async);
+}
+
+void
+GOMP_OFFLOAD_openacc_async_set_async (int async)
+{
+ nvptx_set_async (async);
+}
+
+void *
+GOMP_OFFLOAD_openacc_create_thread_data (void *targ_data)
+{
+ struct ptx_device *ptx_dev = (struct ptx_device *) targ_data;
+ struct nvptx_thread *nvthd
+ = GOMP_PLUGIN_malloc (sizeof (struct nvptx_thread));
+ CUresult r;
+ CUcontext thd_ctx;
+
+ r = cuCtxGetCurrent (&thd_ctx);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuCtxGetCurrent error: %s", cuda_error (r));
+
+ assert (ptx_dev->ctx);
+
+ if (!thd_ctx)
+ {
+ r = cuCtxPushCurrent (ptx_dev->ctx);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuCtxPushCurrent error: %s", cuda_error (r));
+ }
+
+ nvthd->current_stream = ptx_dev->null_stream;
+ nvthd->ptx_dev = ptx_dev;
+
+ return (void *) nvthd;
+}
+
+void
+GOMP_OFFLOAD_openacc_destroy_thread_data (void *data)
+{
+ free (data);
+}
+
+void *
+GOMP_OFFLOAD_openacc_get_current_cuda_device (void)
+{
+ return nvptx_get_current_cuda_device ();
+}
+
+void *
+GOMP_OFFLOAD_openacc_get_current_cuda_context (void)
+{
+ return nvptx_get_current_cuda_context ();
+}
+
+/* NOTE: This returns a CUstream, not a ptx_stream pointer. */
+
+void *
+GOMP_OFFLOAD_openacc_get_cuda_stream (int async)
+{
+ return nvptx_get_cuda_stream (async);
+}
+
+/* NOTE: This takes a CUstream, not a ptx_stream pointer. */
+
+int
+GOMP_OFFLOAD_openacc_set_cuda_stream (int async, void *stream)
+{
+ return nvptx_set_cuda_stream (async, stream);
+}
diff --git a/libgomp/splay-tree.c b/libgomp/splay-tree.c
new file mode 100644
index 0000000..030ca8f
--- /dev/null
+++ b/libgomp/splay-tree.c
@@ -0,0 +1,217 @@
+/* A splay-tree datatype.
+ Copyright (C) 1998-2015 Free Software Foundation, Inc.
+ Contributed by Mark Mitchell (mark@markmitchell.com).
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* The splay tree code copied from include/splay-tree.h and adjusted,
+ so that all the data lives directly in splay_tree_node_s structure
+ and no extra allocations are needed. */
+
+/* For an easily readable description of splay-trees, see:
+
+ Lewis, Harry R. and Denenberg, Larry. Data Structures and Their
+ Algorithms. Harper-Collins, Inc. 1991.
+
+ The major feature of splay trees is that all basic tree operations
+ are amortized O(log n) time for a tree with n nodes. */
+
+#include "libgomp.h"
+#include "splay-tree.h"
+
+extern int splay_compare (splay_tree_key, splay_tree_key);
+
+/* Rotate the edge joining the left child N with its parent P. PP is the
+ grandparents' pointer to P. */
+
+static inline void
+rotate_left (splay_tree_node *pp, splay_tree_node p, splay_tree_node n)
+{
+ splay_tree_node tmp;
+ tmp = n->right;
+ n->right = p;
+ p->left = tmp;
+ *pp = n;
+}
+
+/* Rotate the edge joining the right child N with its parent P. PP is the
+ grandparents' pointer to P. */
+
+static inline void
+rotate_right (splay_tree_node *pp, splay_tree_node p, splay_tree_node n)
+{
+ splay_tree_node tmp;
+ tmp = n->left;
+ n->left = p;
+ p->right = tmp;
+ *pp = n;
+}
+
+/* Bottom up splay of KEY. */
+
+static void
+splay_tree_splay (splay_tree sp, splay_tree_key key)
+{
+ if (sp->root == NULL)
+ return;
+
+ do {
+ int cmp1, cmp2;
+ splay_tree_node n, c;
+
+ n = sp->root;
+ cmp1 = splay_compare (key, &n->key);
+
+ /* Found. */
+ if (cmp1 == 0)
+ return;
+
+ /* Left or right? If no child, then we're done. */
+ if (cmp1 < 0)
+ c = n->left;
+ else
+ c = n->right;
+ if (!c)
+ return;
+
+ /* Next one left or right? If found or no child, we're done
+ after one rotation. */
+ cmp2 = splay_compare (key, &c->key);
+ if (cmp2 == 0
+ || (cmp2 < 0 && !c->left)
+ || (cmp2 > 0 && !c->right))
+ {
+ if (cmp1 < 0)
+ rotate_left (&sp->root, n, c);
+ else
+ rotate_right (&sp->root, n, c);
+ return;
+ }
+
+ /* Now we have the four cases of double-rotation. */
+ if (cmp1 < 0 && cmp2 < 0)
+ {
+ rotate_left (&n->left, c, c->left);
+ rotate_left (&sp->root, n, n->left);
+ }
+ else if (cmp1 > 0 && cmp2 > 0)
+ {
+ rotate_right (&n->right, c, c->right);
+ rotate_right (&sp->root, n, n->right);
+ }
+ else if (cmp1 < 0 && cmp2 > 0)
+ {
+ rotate_right (&n->left, c, c->right);
+ rotate_left (&sp->root, n, n->left);
+ }
+ else if (cmp1 > 0 && cmp2 < 0)
+ {
+ rotate_left (&n->right, c, c->left);
+ rotate_right (&sp->root, n, n->right);
+ }
+ } while (1);
+}
+
+/* Insert a new NODE into SP. The NODE shouldn't exist in the tree. */
+
+attribute_hidden void
+splay_tree_insert (splay_tree sp, splay_tree_node node)
+{
+ int comparison = 0;
+
+ splay_tree_splay (sp, &node->key);
+
+ if (sp->root)
+ comparison = splay_compare (&sp->root->key, &node->key);
+
+ if (sp->root && comparison == 0)
+ gomp_fatal ("Duplicate node");
+ else
+ {
+ /* Insert it at the root. */
+ if (sp->root == NULL)
+ node->left = node->right = NULL;
+ else if (comparison < 0)
+ {
+ node->left = sp->root;
+ node->right = node->left->right;
+ node->left->right = NULL;
+ }
+ else
+ {
+ node->right = sp->root;
+ node->left = node->right->left;
+ node->right->left = NULL;
+ }
+
+ sp->root = node;
+ }
+}
+
+/* Remove node with KEY from SP. It is not an error if it did not exist. */
+
+attribute_hidden void
+splay_tree_remove (splay_tree sp, splay_tree_key key)
+{
+ splay_tree_splay (sp, key);
+
+ if (sp->root && splay_compare (&sp->root->key, key) == 0)
+ {
+ splay_tree_node left, right;
+
+ left = sp->root->left;
+ right = sp->root->right;
+
+ /* One of the children is now the root. Doesn't matter much
+ which, so long as we preserve the properties of the tree. */
+ if (left)
+ {
+ sp->root = left;
+
+ /* If there was a right child as well, hang it off the
+ right-most leaf of the left child. */
+ if (right)
+ {
+ while (left->right)
+ left = left->right;
+ left->right = right;
+ }
+ }
+ else
+ sp->root = right;
+ }
+}
+
+/* Lookup KEY in SP, returning NODE if present, and NULL
+ otherwise. */
+
+attribute_hidden splay_tree_key
+splay_tree_lookup (splay_tree sp, splay_tree_key key)
+{
+ splay_tree_splay (sp, key);
+
+ if (sp->root && splay_compare (&sp->root->key, key) == 0)
+ return &sp->root->key;
+ else
+ return NULL;
+}
diff --git a/libgomp/splay-tree.h b/libgomp/splay-tree.h
index 1296be6..085021c 100644
--- a/libgomp/splay-tree.h
+++ b/libgomp/splay-tree.h
@@ -43,6 +43,9 @@ typedef struct splay_tree_key_s *splay_tree_key;
The major feature of splay trees is that all basic tree operations
are amortized O(log n) time for a tree with n nodes. */
+#ifndef _SPLAY_TREE_H
+#define _SPLAY_TREE_H 1
+
/* The nodes in the splay tree. */
struct splay_tree_node_s {
struct splay_tree_key_s key;
@@ -56,177 +59,8 @@ struct splay_tree_s {
splay_tree_node root;
};
-/* Rotate the edge joining the left child N with its parent P. PP is the
- grandparents' pointer to P. */
-
-static inline void
-rotate_left (splay_tree_node *pp, splay_tree_node p, splay_tree_node n)
-{
- splay_tree_node tmp;
- tmp = n->right;
- n->right = p;
- p->left = tmp;
- *pp = n;
-}
-
-/* Rotate the edge joining the right child N with its parent P. PP is the
- grandparents' pointer to P. */
-
-static inline void
-rotate_right (splay_tree_node *pp, splay_tree_node p, splay_tree_node n)
-{
- splay_tree_node tmp;
- tmp = n->left;
- n->left = p;
- p->right = tmp;
- *pp = n;
-}
-
-/* Bottom up splay of KEY. */
-
-static void
-splay_tree_splay (splay_tree sp, splay_tree_key key)
-{
- if (sp->root == NULL)
- return;
-
- do {
- int cmp1, cmp2;
- splay_tree_node n, c;
-
- n = sp->root;
- cmp1 = splay_compare (key, &n->key);
-
- /* Found. */
- if (cmp1 == 0)
- return;
-
- /* Left or right? If no child, then we're done. */
- if (cmp1 < 0)
- c = n->left;
- else
- c = n->right;
- if (!c)
- return;
-
- /* Next one left or right? If found or no child, we're done
- after one rotation. */
- cmp2 = splay_compare (key, &c->key);
- if (cmp2 == 0
- || (cmp2 < 0 && !c->left)
- || (cmp2 > 0 && !c->right))
- {
- if (cmp1 < 0)
- rotate_left (&sp->root, n, c);
- else
- rotate_right (&sp->root, n, c);
- return;
- }
-
- /* Now we have the four cases of double-rotation. */
- if (cmp1 < 0 && cmp2 < 0)
- {
- rotate_left (&n->left, c, c->left);
- rotate_left (&sp->root, n, n->left);
- }
- else if (cmp1 > 0 && cmp2 > 0)
- {
- rotate_right (&n->right, c, c->right);
- rotate_right (&sp->root, n, n->right);
- }
- else if (cmp1 < 0 && cmp2 > 0)
- {
- rotate_right (&n->left, c, c->right);
- rotate_left (&sp->root, n, n->left);
- }
- else if (cmp1 > 0 && cmp2 < 0)
- {
- rotate_left (&n->right, c, c->left);
- rotate_right (&sp->root, n, n->right);
- }
- } while (1);
-}
-
-/* Insert a new NODE into SP. The NODE shouldn't exist in the tree. */
-
-static void
-splay_tree_insert (splay_tree sp, splay_tree_node node)
-{
- int comparison = 0;
-
- splay_tree_splay (sp, &node->key);
-
- if (sp->root)
- comparison = splay_compare (&sp->root->key, &node->key);
-
- if (sp->root && comparison == 0)
- abort ();
- else
- {
- /* Insert it at the root. */
- if (sp->root == NULL)
- node->left = node->right = NULL;
- else if (comparison < 0)
- {
- node->left = sp->root;
- node->right = node->left->right;
- node->left->right = NULL;
- }
- else
- {
- node->right = sp->root;
- node->left = node->right->left;
- node->right->left = NULL;
- }
-
- sp->root = node;
- }
-}
-
-/* Remove node with KEY from SP. It is not an error if it did not exist. */
-
-static void
-splay_tree_remove (splay_tree sp, splay_tree_key key)
-{
- splay_tree_splay (sp, key);
-
- if (sp->root && splay_compare (&sp->root->key, key) == 0)
- {
- splay_tree_node left, right;
-
- left = sp->root->left;
- right = sp->root->right;
-
- /* One of the children is now the root. Doesn't matter much
- which, so long as we preserve the properties of the tree. */
- if (left)
- {
- sp->root = left;
-
- /* If there was a right child as well, hang it off the
- right-most leaf of the left child. */
- if (right)
- {
- while (left->right)
- left = left->right;
- left->right = right;
- }
- }
- else
- sp->root = right;
- }
-}
-
-/* Lookup KEY in SP, returning NODE if present, and NULL
- otherwise. */
-
-static splay_tree_key
-splay_tree_lookup (splay_tree sp, splay_tree_key key)
-{
- splay_tree_splay (sp, key);
+extern splay_tree_key splay_tree_lookup (splay_tree, splay_tree_key);
+extern void splay_tree_insert (splay_tree, splay_tree_node);
+extern void splay_tree_remove (splay_tree, splay_tree_key);
- if (sp->root && splay_compare (&sp->root->key, key) == 0)
- return &sp->root->key;
- else
- return NULL;
-}
+#endif /* _SPLAY_TREE_H */
diff --git a/libgomp/target.c b/libgomp/target.c
index ec097de..83ad511 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -27,11 +27,14 @@
#include "config.h"
#include "libgomp.h"
-#include "libgomp_target.h"
+#include "oacc-plugin.h"
+#include "oacc-int.h"
+#include "gomp-constants.h"
#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
+#include <assert.h>
#ifdef PLUGIN_SUPPORT
#include <dlfcn.h>
@@ -39,52 +42,9 @@
static void gomp_target_init (void);
+/* The whole initialization code for offloading plugins is only run one. */
static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
-/* Forward declaration for a node in the tree. */
-typedef struct splay_tree_node_s *splay_tree_node;
-typedef struct splay_tree_s *splay_tree;
-typedef struct splay_tree_key_s *splay_tree_key;
-
-struct target_mem_desc {
- /* Reference count. */
- uintptr_t refcount;
- /* All the splay nodes allocated together. */
- splay_tree_node array;
- /* Start of the target region. */
- uintptr_t tgt_start;
- /* End of the targer region. */
- uintptr_t tgt_end;
- /* Handle to free. */
- void *to_free;
- /* Previous target_mem_desc. */
- struct target_mem_desc *prev;
- /* Number of items in following list. */
- size_t list_count;
-
- /* Corresponding target device descriptor. */
- struct gomp_device_descr *device_descr;
-
- /* List of splay keys to remove (or decrease refcount)
- at the end of region. */
- splay_tree_key list[];
-};
-
-struct splay_tree_key_s {
- /* Address of the host object. */
- uintptr_t host_start;
- /* Address immediately after the host object. */
- uintptr_t host_end;
- /* Descriptor of the target memory. */
- struct target_mem_desc *tgt;
- /* Offset from tgt->tgt_start to the start of the target object. */
- uintptr_t tgt_offset;
- /* Reference count. */
- uintptr_t refcount;
- /* True if data should be copied from device to host at the end. */
- bool copy_from;
-};
-
/* This structure describes an offload image.
It contains type of the target device, pointer to host table descriptor, and
pointer to target data. */
@@ -106,9 +66,12 @@ static struct gomp_device_descr *devices;
/* Total number of available devices. */
static int num_devices;
+/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
+static int num_devices_openmp;
+
/* The comparison function. */
-static int
+attribute_hidden int
splay_compare (splay_tree_key x, splay_tree_key y)
{
if (x->host_start == x->host_end
@@ -123,54 +86,23 @@ splay_compare (splay_tree_key x, splay_tree_key y)
#include "splay-tree.h"
-/* This structure describes accelerator device.
- It contains ID-number of the device, its type, function handlers for
- interaction with the device, and information about mapped memory. */
-struct gomp_device_descr
+attribute_hidden void
+gomp_init_targets_once (void)
{
- /* This is the ID number of device. It could be specified in DEVICE-clause of
- TARGET construct. */
- int id;
-
- /* This is the ID number of device among devices of the same type. */
- int target_id;
-
- /* This is the TYPE of device. */
- enum offload_target_type type;
-
- /* Set to true when device is initialized. */
- bool is_initialized;
-
- /* Function handlers. */
- int (*get_type_func) (void);
- int (*get_num_devices_func) (void);
- void (*register_image_func) (void *, void *);
- void (*init_device_func) (int);
- int (*get_table_func) (int, void *);
- void *(*alloc_func) (int, size_t);
- void (*free_func) (int, void *);
- void *(*host2dev_func) (int, void *, const void *, size_t);
- void *(*dev2host_func) (int, void *, const void *, size_t);
- void (*run_func) (int, void *, void *);
-
- /* Splay tree containing information about mapped memory regions. */
- struct splay_tree_s dev_splay_tree;
-
- /* Mutex for operating with the splay tree and other shared structures. */
- gomp_mutex_t dev_env_lock;
-};
+ (void) pthread_once (&gomp_is_initialized, gomp_target_init);
+}
attribute_hidden int
gomp_get_num_devices (void)
{
- (void) pthread_once (&gomp_is_initialized, gomp_target_init);
- return num_devices;
+ gomp_init_targets_once ();
+ return num_devices_openmp;
}
static struct gomp_device_descr *
resolve_device (int device_id)
{
- if (device_id == -1)
+ if (device_id == GOMP_DEVICE_ICV)
{
struct gomp_task_icv *icv = gomp_icv (false);
device_id = icv->default_device_var;
@@ -190,27 +122,39 @@ static inline void
gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
unsigned char kind)
{
- if (oldn->host_start > newn->host_start
+ if ((kind & GOMP_MAP_FLAG_FORCE)
+ || oldn->host_start > newn->host_start
|| oldn->host_end < newn->host_end)
- gomp_fatal ("Trying to map into device [%p..%p) object when"
+ gomp_fatal ("Trying to map into device [%p..%p) object when "
"[%p..%p) is already mapped",
(void *) newn->host_start, (void *) newn->host_end,
(void *) oldn->host_start, (void *) oldn->host_end);
oldn->refcount++;
}
-static struct target_mem_desc *
+static int
+get_kind (bool is_openacc, void *kinds, int idx)
+{
+ return is_openacc ? ((unsigned short *) kinds)[idx]
+ : ((unsigned char *) kinds)[idx];
+}
+
+attribute_hidden struct target_mem_desc *
gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
- void **hostaddrs, size_t *sizes, unsigned char *kinds,
- bool is_target)
+ void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
+ bool is_openacc, bool is_target)
{
size_t i, tgt_align, tgt_size, not_found_cnt = 0;
+ const int rshift = is_openacc ? 8 : 3;
+ const int typemask = is_openacc ? 0xff : 0x7;
+ struct gomp_memory_mapping *mm = &devicep->mem_map;
struct splay_tree_key_s cur_node;
struct target_mem_desc *tgt
= gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
tgt->list_count = mapnum;
tgt->refcount = 1;
tgt->device_descr = devicep;
+ tgt->mem_map = mm;
if (mapnum == 0)
return tgt;
@@ -224,40 +168,43 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
tgt_size = mapnum * sizeof (void *);
}
- gomp_mutex_lock (&devicep->dev_env_lock);
+ gomp_mutex_lock (&mm->lock);
+
for (i = 0; i < mapnum; i++)
{
+ int kind = get_kind (is_openacc, kinds, i);
if (hostaddrs[i] == NULL)
{
tgt->list[i] = NULL;
continue;
}
cur_node.host_start = (uintptr_t) hostaddrs[i];
- if ((kinds[i] & 7) != 4)
+ if (!GOMP_MAP_POINTER_P (kind & typemask))
cur_node.host_end = cur_node.host_start + sizes[i];
else
cur_node.host_end = cur_node.host_start + sizeof (void *);
- splay_tree_key n = splay_tree_lookup (&devicep->dev_splay_tree,
- &cur_node);
+ splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node);
if (n)
{
tgt->list[i] = n;
- gomp_map_vars_existing (n, &cur_node, kinds[i]);
+ gomp_map_vars_existing (n, &cur_node, kind & typemask);
}
else
{
- size_t align = (size_t) 1 << (kinds[i] >> 3);
tgt->list[i] = NULL;
+
+ size_t align = (size_t) 1 << (kind >> rshift);
not_found_cnt++;
if (tgt_align < align)
tgt_align = align;
tgt_size = (tgt_size + align - 1) & ~(align - 1);
tgt_size += cur_node.host_end - cur_node.host_start;
- if ((kinds[i] & 7) == 5)
+ if ((kind & typemask) == GOMP_MAP_TO_PSET)
{
size_t j;
for (j = i + 1; j < mapnum; j++)
- if ((kinds[j] & 7) != 4)
+ if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
+ & typemask))
break;
else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
|| ((uintptr_t) hostaddrs[j] + sizeof (void *)
@@ -272,7 +219,15 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
}
}
- if (not_found_cnt || is_target)
+ if (devaddrs)
+ {
+ if (mapnum != 1)
+ gomp_fatal ("unexpected aggregation");
+ tgt->to_free = devaddrs[0];
+ tgt->tgt_start = (uintptr_t) tgt->to_free;
+ tgt->tgt_end = tgt->tgt_start + sizes[0];
+ }
+ else if (not_found_cnt || is_target)
{
/* Allocate tgt_align aligned tgt_size block of memory. */
/* FIXME: Perhaps change interface to allocate properly aligned
@@ -304,44 +259,47 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
for (i = 0; i < mapnum; i++)
if (tgt->list[i] == NULL)
{
+ int kind = get_kind (is_openacc, kinds, i);
if (hostaddrs[i] == NULL)
continue;
splay_tree_key k = &array->key;
k->host_start = (uintptr_t) hostaddrs[i];
- if ((kinds[i] & 7) != 4)
+ if (!GOMP_MAP_POINTER_P (kind & typemask))
k->host_end = k->host_start + sizes[i];
else
k->host_end = k->host_start + sizeof (void *);
- splay_tree_key n
- = splay_tree_lookup (&devicep->dev_splay_tree, k);
+ splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k);
if (n)
{
tgt->list[i] = n;
- gomp_map_vars_existing (n, k, kinds[i]);
+ gomp_map_vars_existing (n, k, kind & typemask);
}
else
{
- size_t align = (size_t) 1 << (kinds[i] >> 3);
+ size_t align = (size_t) 1 << (kind >> rshift);
tgt->list[i] = k;
tgt_size = (tgt_size + align - 1) & ~(align - 1);
k->tgt = tgt;
k->tgt_offset = tgt_size;
tgt_size += k->host_end - k->host_start;
- k->copy_from = false;
- if ((kinds[i] & 7) == 2 || (kinds[i] & 7) == 3)
- k->copy_from = true;
+ k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
k->refcount = 1;
+ k->async_refcount = 0;
tgt->refcount++;
array->left = NULL;
array->right = NULL;
- splay_tree_insert (&devicep->dev_splay_tree, array);
- switch (kinds[i] & 7)
+ splay_tree_insert (&mm->splay_tree, array);
+ switch (kind & typemask)
{
- case 0: /* ALLOC */
- case 2: /* FROM */
+ case GOMP_MAP_ALLOC:
+ case GOMP_MAP_FROM:
+ case GOMP_MAP_FORCE_ALLOC:
+ case GOMP_MAP_FORCE_FROM:
break;
- case 1: /* TO */
- case 3: /* TOFROM */
+ case GOMP_MAP_TO:
+ case GOMP_MAP_TOFROM:
+ case GOMP_MAP_FORCE_TO:
+ case GOMP_MAP_FORCE_TOFROM:
/* FIXME: Perhaps add some smarts, like if copying
several adjacent fields from host to target, use some
host buffer to avoid sending each var individually. */
@@ -351,12 +309,13 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
(void *) k->host_start,
k->host_end - k->host_start);
break;
- case 4: /* POINTER */
+ case GOMP_MAP_POINTER:
cur_node.host_start
= (uintptr_t) *(void **) k->host_start;
if (cur_node.host_start == (uintptr_t) NULL)
{
cur_node.tgt_offset = (uintptr_t) NULL;
+ /* FIXME: see above FIXME comment. */
devicep->host2dev_func (devicep->target_id,
(void *) (tgt->tgt_start
+ k->tgt_offset),
@@ -367,19 +326,16 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
/* Add bias to the pointer value. */
cur_node.host_start += sizes[i];
cur_node.host_end = cur_node.host_start + 1;
- n = splay_tree_lookup (&devicep->dev_splay_tree,
- &cur_node);
+ n = splay_tree_lookup (&mm->splay_tree, &cur_node);
if (n == NULL)
{
/* Could be possibly zero size array section. */
cur_node.host_end--;
- n = splay_tree_lookup (&devicep->dev_splay_tree,
- &cur_node);
+ n = splay_tree_lookup (&mm->splay_tree, &cur_node);
if (n == NULL)
{
cur_node.host_start--;
- n = splay_tree_lookup (&devicep->dev_splay_tree,
- &cur_node);
+ n = splay_tree_lookup (&mm->splay_tree, &cur_node);
cur_node.host_start++;
}
}
@@ -393,20 +349,24 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
array section. Now subtract bias to get what we want
to initialize the pointer with. */
cur_node.tgt_offset -= sizes[i];
+ /* FIXME: see above FIXME comment. */
devicep->host2dev_func (devicep->target_id,
(void *) (tgt->tgt_start
+ k->tgt_offset),
(void *) &cur_node.tgt_offset,
sizeof (void *));
break;
- case 5: /* TO_PSET */
+ case GOMP_MAP_TO_PSET:
+ /* FIXME: see above FIXME comment. */
devicep->host2dev_func (devicep->target_id,
(void *) (tgt->tgt_start
+ k->tgt_offset),
(void *) k->host_start,
k->host_end - k->host_start);
+
for (j = i + 1; j < mapnum; j++)
- if ((kinds[j] & 7) != 4)
+ if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
+ & typemask))
break;
else if ((uintptr_t) hostaddrs[j] < k->host_start
|| ((uintptr_t) hostaddrs[j] + sizeof (void *)
@@ -421,6 +381,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
if (cur_node.host_start == (uintptr_t) NULL)
{
cur_node.tgt_offset = (uintptr_t) NULL;
+ /* FIXME: see above FIXME comment. */
devicep->host2dev_func (devicep->target_id,
(void *) (tgt->tgt_start + k->tgt_offset
+ ((uintptr_t) hostaddrs[j]
@@ -433,19 +394,18 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
/* Add bias to the pointer value. */
cur_node.host_start += sizes[j];
cur_node.host_end = cur_node.host_start + 1;
- n = splay_tree_lookup (&devicep->dev_splay_tree,
- &cur_node);
+ n = splay_tree_lookup (&mm->splay_tree, &cur_node);
if (n == NULL)
{
/* Could be possibly zero size array section. */
cur_node.host_end--;
- n = splay_tree_lookup (&devicep->dev_splay_tree,
+ n = splay_tree_lookup (&mm->splay_tree,
&cur_node);
if (n == NULL)
{
cur_node.host_start--;
- n = splay_tree_lookup
- (&devicep->dev_splay_tree, &cur_node);
+ n = splay_tree_lookup (&mm->splay_tree,
+ &cur_node);
cur_node.host_start++;
}
}
@@ -460,6 +420,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
array section. Now subtract bias to get what we
want to initialize the pointer with. */
cur_node.tgt_offset -= sizes[j];
+ /* FIXME: see above FIXME comment. */
devicep->host2dev_func (devicep->target_id,
(void *) (tgt->tgt_start + k->tgt_offset
+ ((uintptr_t) hostaddrs[j]
@@ -468,12 +429,35 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
sizeof (void *));
i++;
}
- break;
+ break;
+ case GOMP_MAP_FORCE_PRESENT:
+ {
+ /* We already looked up the memory region above and it
+ was missing. */
+ size_t size = k->host_end - k->host_start;
+ gomp_fatal ("present clause: !acc_is_present (%p, "
+ "%zd (0x%zx))", (void *) k->host_start,
+ size, size);
+ }
+ break;
+ case GOMP_MAP_FORCE_DEVICEPTR:
+ assert (k->host_end - k->host_start == sizeof (void *));
+
+ devicep->host2dev_func (devicep->target_id,
+ (void *) (tgt->tgt_start
+ + k->tgt_offset),
+ (void *) k->host_start,
+ sizeof (void *));
+ break;
+ default:
+ gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
+ kind);
}
array++;
}
}
}
+
if (is_target)
{
for (i = 0; i < mapnum; i++)
@@ -483,6 +467,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
else
cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
+ tgt->list[i]->tgt_offset;
+ /* FIXME: see above FIXME comment. */
devicep->host2dev_func (devicep->target_id,
(void *) (tgt->tgt_start
+ i * sizeof (void *)),
@@ -491,7 +476,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
}
}
- gomp_mutex_unlock (&devicep->dev_env_lock);
+ gomp_mutex_unlock (&mm->lock);
return tgt;
}
@@ -506,10 +491,50 @@ gomp_unmap_tgt (struct target_mem_desc *tgt)
free (tgt);
}
-static void
-gomp_unmap_vars (struct target_mem_desc *tgt)
+/* Decrease the refcount for a set of mapped variables, and queue asychronous
+ copies from the device back to the host after any work that has been issued.
+ Because the regions are still "live", increment an asynchronous reference
+ count to indicate that they should not be unmapped from host-side data
+ structures until the asynchronous copy has completed. */
+
+attribute_hidden void
+gomp_copy_from_async (struct target_mem_desc *tgt)
+{
+ struct gomp_device_descr *devicep = tgt->device_descr;
+ struct gomp_memory_mapping *mm = tgt->mem_map;
+ size_t i;
+
+ gomp_mutex_lock (&mm->lock);
+
+ for (i = 0; i < tgt->list_count; i++)
+ if (tgt->list[i] == NULL)
+ ;
+ else if (tgt->list[i]->refcount > 1)
+ {
+ tgt->list[i]->refcount--;
+ tgt->list[i]->async_refcount++;
+ }
+ else
+ {
+ splay_tree_key k = tgt->list[i];
+ if (k->copy_from)
+ devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
+ (void *) (k->tgt->tgt_start + k->tgt_offset),
+ k->host_end - k->host_start);
+ }
+
+ gomp_mutex_unlock (&mm->lock);
+}
+
+/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
+ variables back from device to host: if it is false, it is assumed that this
+ has been done already, i.e. by gomp_copy_from_async above. */
+
+attribute_hidden void
+gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
{
struct gomp_device_descr *devicep = tgt->device_descr;
+ struct gomp_memory_mapping *mm = tgt->mem_map;
if (tgt->list_count == 0)
{
@@ -517,21 +542,24 @@ gomp_unmap_vars (struct target_mem_desc *tgt)
return;
}
+ gomp_mutex_lock (&mm->lock);
+
size_t i;
- gomp_mutex_lock (&devicep->dev_env_lock);
for (i = 0; i < tgt->list_count; i++)
if (tgt->list[i] == NULL)
;
else if (tgt->list[i]->refcount > 1)
tgt->list[i]->refcount--;
+ else if (tgt->list[i]->async_refcount > 0)
+ tgt->list[i]->async_refcount--;
else
{
splay_tree_key k = tgt->list[i];
- if (k->copy_from)
+ if (k->copy_from && do_copyfrom)
devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
(void *) (k->tgt->tgt_start + k->tgt_offset),
k->host_end - k->host_start);
- splay_tree_remove (&devicep->dev_splay_tree, k);
+ splay_tree_remove (&mm->splay_tree, k);
if (k->tgt->refcount > 1)
k->tgt->refcount--;
else
@@ -542,15 +570,18 @@ gomp_unmap_vars (struct target_mem_desc *tgt)
tgt->refcount--;
else
gomp_unmap_tgt (tgt);
- gomp_mutex_unlock (&devicep->dev_env_lock);
+
+ gomp_mutex_unlock (&mm->lock);
}
static void
-gomp_update (struct gomp_device_descr *devicep, size_t mapnum,
- void **hostaddrs, size_t *sizes, unsigned char *kinds)
+gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
+ size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
+ bool is_openacc)
{
size_t i;
struct splay_tree_key_s cur_node;
+ const int typemask = is_openacc ? 0xff : 0x7;
if (!devicep)
return;
@@ -558,16 +589,17 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum,
if (mapnum == 0)
return;
- gomp_mutex_lock (&devicep->dev_env_lock);
+ gomp_mutex_lock (&mm->lock);
for (i = 0; i < mapnum; i++)
if (sizes[i])
{
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = cur_node.host_start + sizes[i];
- splay_tree_key n = splay_tree_lookup (&devicep->dev_splay_tree,
+ splay_tree_key n = splay_tree_lookup (&mm->splay_tree,
&cur_node);
if (n)
{
+ int kind = get_kind (is_openacc, kinds, i);
if (n->host_start > cur_node.host_start
|| n->host_end < cur_node.host_end)
gomp_fatal ("Trying to update [%p..%p) object when"
@@ -576,7 +608,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum,
(void *) cur_node.host_end,
(void *) n->host_start,
(void *) n->host_end);
- if ((kinds[i] & 7) == 1)
+ if (GOMP_MAP_COPY_TO_P (kind & typemask))
devicep->host2dev_func (devicep->target_id,
(void *) (n->tgt->tgt_start
+ n->tgt_offset
@@ -584,7 +616,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum,
- n->host_start),
(void *) cur_node.host_start,
cur_node.host_end - cur_node.host_start);
- else if ((kinds[i] & 7) == 2)
+ if (GOMP_MAP_COPY_FROM_P (kind & typemask))
devicep->dev2host_func (devicep->target_id,
(void *) cur_node.host_start,
(void *) (n->tgt->tgt_start
@@ -598,7 +630,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum,
(void *) cur_node.host_start,
(void *) cur_node.host_end);
}
- gomp_mutex_unlock (&devicep->dev_env_lock);
+ gomp_mutex_unlock (&mm->lock);
}
/* This function should be called from every offload image.
@@ -620,13 +652,23 @@ GOMP_offload_register (void *host_table, enum offload_target_type target_type,
num_offload_images++;
}
-/* This function initializes the target device, specified by DEVICEP. */
+/* This function initializes the target device, specified by DEVICEP. DEVICEP
+ must be locked on entry, and remains locked on return. */
-static void
+attribute_hidden void
gomp_init_device (struct gomp_device_descr *devicep)
{
devicep->init_device_func (devicep->target_id);
+ devicep->is_initialized = true;
+}
+/* Initialize address mapping tables. MM must be locked on entry, and remains
+ locked on return. */
+
+attribute_hidden void
+gomp_init_tables (struct gomp_device_descr *devicep,
+ struct gomp_memory_mapping *mm)
+{
/* Get address mapping table for device. */
struct mapping_table *table = NULL;
int num_entries = devicep->get_table_func (devicep->target_id, &table);
@@ -653,30 +695,63 @@ gomp_init_device (struct gomp_device_descr *devicep)
k->tgt = tgt;
node->left = NULL;
node->right = NULL;
- splay_tree_insert (&devicep->dev_splay_tree, node);
+ splay_tree_insert (&mm->splay_tree, node);
}
free (table);
- devicep->is_initialized = true;
+ mm->is_initialized = true;
+}
+
+/* Free address mapping tables. MM must be locked on entry, and remains locked
+ on return. */
+
+attribute_hidden void
+gomp_free_memmap (struct gomp_memory_mapping *mm)
+{
+ while (mm->splay_tree.root)
+ {
+ struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt;
+
+ splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key);
+ free (tgt->array);
+ free (tgt);
+ }
+
+ mm->is_initialized = false;
+}
+
+/* This function de-initializes the target device, specified by DEVICEP.
+ DEVICEP must be locked on entry, and remains locked on return. */
+
+attribute_hidden void
+gomp_fini_device (struct gomp_device_descr *devicep)
+{
+ if (devicep->is_initialized)
+ devicep->fini_device_func (devicep->target_id);
+
+ devicep->is_initialized = false;
}
/* Called when encountering a target directive. If DEVICE
- is -1, it means use device-var ICV. If it is -2 (or any other value
- larger than last available hw device, use host fallback.
- FN is address of host code, OPENMP_TARGET contains value of the
- __OPENMP_TARGET__ symbol in the shared library or binary that invokes
+ is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
+ GOMP_DEVICE_HOST_FALLBACK (or any value
+ larger than last available hw device), use host fallback.
+ FN is address of host code, OFFLOAD_TABLE contains value of the
+ __OFFLOAD_TABLE__ symbol in the shared library or binary that invokes
GOMP_target. HOSTADDRS, SIZES and KINDS are arrays
with MAPNUM entries, with addresses of the host objects,
sizes of the host objects (resp. for pointer kind pointer bias
and assumed sizeof (void *) size) and kinds. */
void
-GOMP_target (int device, void (*fn) (void *), const void *openmp_target,
+GOMP_target (int device, void (*fn) (void *), const void *offload_table,
size_t mapnum, void **hostaddrs, size_t *sizes,
unsigned char *kinds)
{
struct gomp_device_descr *devicep = resolve_device (device);
- if (devicep == NULL)
+
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
{
/* Host fallback. */
struct gomp_thread old_thr, *thr = gomp_thread ();
@@ -693,20 +768,38 @@ GOMP_target (int device, void (*fn) (void *), const void *openmp_target,
return;
}
- gomp_mutex_lock (&devicep->dev_env_lock);
+ gomp_mutex_lock (&devicep->lock);
if (!devicep->is_initialized)
gomp_init_device (devicep);
+ gomp_mutex_unlock (&devicep->lock);
+
+ void *fn_addr;
+
+ if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
+ fn_addr = (void *) fn;
+ else
+ {
+ struct gomp_memory_mapping *mm = &devicep->mem_map;
+ gomp_mutex_lock (&mm->lock);
+
+ if (!mm->is_initialized)
+ gomp_init_tables (devicep, mm);
+
+ struct splay_tree_key_s k;
+ k.host_start = (uintptr_t) fn;
+ k.host_end = k.host_start + 1;
+ splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k);
+ if (tgt_fn == NULL)
+ gomp_fatal ("Target function wasn't mapped");
- struct splay_tree_key_s k;
- k.host_start = (uintptr_t) fn;
- k.host_end = k.host_start + 1;
- splay_tree_key tgt_fn = splay_tree_lookup (&devicep->dev_splay_tree, &k);
- if (tgt_fn == NULL)
- gomp_fatal ("Target function wasn't mapped");
- gomp_mutex_unlock (&devicep->dev_env_lock);
+ gomp_mutex_unlock (&mm->lock);
+
+ fn_addr = (void *) tgt_fn->tgt->tgt_start;
+ }
struct target_mem_desc *tgt_vars
- = gomp_map_vars (devicep, mapnum, hostaddrs, sizes, kinds, true);
+ = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
+ true);
struct gomp_thread old_thr, *thr = gomp_thread ();
old_thr = *thr;
memset (thr, '\0', sizeof (*thr));
@@ -715,19 +808,20 @@ GOMP_target (int device, void (*fn) (void *), const void *openmp_target,
thr->place = old_thr.place;
thr->ts.place_partition_len = gomp_places_list_len;
}
- devicep->run_func (devicep->target_id, (void *) tgt_fn->tgt->tgt_start,
- (void *) tgt_vars->tgt_start);
+ devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
gomp_free_thread (thr);
*thr = old_thr;
- gomp_unmap_vars (tgt_vars);
+ gomp_unmap_vars (tgt_vars, true);
}
void
-GOMP_target_data (int device, const void *openmp_target, size_t mapnum,
+GOMP_target_data (int device, const void *offload_table, size_t mapnum,
void **hostaddrs, size_t *sizes, unsigned char *kinds)
{
struct gomp_device_descr *devicep = resolve_device (device);
- if (devicep == NULL)
+
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
{
/* Host fallback. */
struct gomp_task_icv *icv = gomp_icv (false);
@@ -738,20 +832,27 @@ GOMP_target_data (int device, const void *openmp_target, size_t mapnum,
new #pragma omp target data, otherwise GOMP_target_end_data
would get out of sync. */
struct target_mem_desc *tgt
- = gomp_map_vars (NULL, 0, NULL, NULL, NULL, false);
+ = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
tgt->prev = icv->target_data;
icv->target_data = tgt;
}
return;
}
- gomp_mutex_lock (&devicep->dev_env_lock);
+ gomp_mutex_lock (&devicep->lock);
if (!devicep->is_initialized)
gomp_init_device (devicep);
- gomp_mutex_unlock (&devicep->dev_env_lock);
+ gomp_mutex_unlock (&devicep->lock);
+
+ struct gomp_memory_mapping *mm = &devicep->mem_map;
+ gomp_mutex_lock (&mm->lock);
+ if (!mm->is_initialized)
+ gomp_init_tables (devicep, mm);
+ gomp_mutex_unlock (&mm->lock);
struct target_mem_desc *tgt
- = gomp_map_vars (devicep, mapnum, hostaddrs, sizes, kinds, false);
+ = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
+ false);
struct gomp_task_icv *icv = gomp_icv (true);
tgt->prev = icv->target_data;
icv->target_data = tgt;
@@ -765,24 +866,32 @@ GOMP_target_end_data (void)
{
struct target_mem_desc *tgt = icv->target_data;
icv->target_data = tgt->prev;
- gomp_unmap_vars (tgt);
+ gomp_unmap_vars (tgt, true);
}
}
void
-GOMP_target_update (int device, const void *openmp_target, size_t mapnum,
+GOMP_target_update (int device, const void *offload_table, size_t mapnum,
void **hostaddrs, size_t *sizes, unsigned char *kinds)
{
struct gomp_device_descr *devicep = resolve_device (device);
- if (devicep == NULL)
+
+ if (devicep == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
return;
- gomp_mutex_lock (&devicep->dev_env_lock);
+ gomp_mutex_lock (&devicep->lock);
if (!devicep->is_initialized)
gomp_init_device (devicep);
- gomp_mutex_unlock (&devicep->dev_env_lock);
+ gomp_mutex_unlock (&devicep->lock);
+
+ struct gomp_memory_mapping *mm = &devicep->mem_map;
+ gomp_mutex_lock (&mm->lock);
+ if (!mm->is_initialized)
+ gomp_init_tables (devicep, mm);
+ gomp_mutex_unlock (&mm->lock);
- gomp_update (devicep, mapnum, hostaddrs, sizes, kinds);
+ gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false);
}
void
@@ -808,54 +917,137 @@ static bool
gomp_load_plugin_for_device (struct gomp_device_descr *device,
const char *plugin_name)
{
+ char *err = NULL, *last_missing = NULL;
+ int optional_present, optional_total;
+
+ /* Clear any existing error. */
+ dlerror ();
+
void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
if (!plugin_handle)
- return false;
+ {
+ err = dlerror ();
+ goto out;
+ }
/* Check if all required functions are available in the plugin and store
their handlers. */
-#define DLSYM(f) \
- do \
- { \
- device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_"#f); \
- if (!device->f##_func) \
- return false; \
- } \
+#define DLSYM(f) \
+ do \
+ { \
+ device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \
+ err = dlerror (); \
+ if (err != NULL) \
+ goto out; \
+ } \
while (0)
+ /* Similar, but missing functions are not an error. */
+#define DLSYM_OPT(f, n) \
+ do \
+ { \
+ char *tmp_err; \
+ device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \
+ tmp_err = dlerror (); \
+ if (tmp_err == NULL) \
+ optional_present++; \
+ else \
+ last_missing = #n; \
+ optional_total++; \
+ } \
+ while (0)
+
+ DLSYM (get_name);
+ DLSYM (get_caps);
DLSYM (get_type);
DLSYM (get_num_devices);
DLSYM (register_image);
DLSYM (init_device);
+ DLSYM (fini_device);
DLSYM (get_table);
DLSYM (alloc);
DLSYM (free);
DLSYM (dev2host);
DLSYM (host2dev);
- DLSYM (run);
+ device->capabilities = device->get_caps_func ();
+ if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ DLSYM (run);
+ if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
+ {
+ optional_present = optional_total = 0;
+ DLSYM_OPT (openacc.exec, openacc_parallel);
+ DLSYM_OPT (openacc.open_device, openacc_open_device);
+ DLSYM_OPT (openacc.close_device, openacc_close_device);
+ DLSYM_OPT (openacc.get_device_num, openacc_get_device_num);
+ DLSYM_OPT (openacc.set_device_num, openacc_set_device_num);
+ DLSYM_OPT (openacc.register_async_cleanup,
+ openacc_register_async_cleanup);
+ DLSYM_OPT (openacc.async_test, openacc_async_test);
+ DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
+ DLSYM_OPT (openacc.async_wait, openacc_async_wait);
+ DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
+ DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
+ DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
+ DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
+ DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
+ DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
+ /* Require all the OpenACC handlers if we have
+ GOMP_OFFLOAD_CAP_OPENACC_200. */
+ if (optional_present != optional_total)
+ {
+ err = "plugin missing OpenACC handler function";
+ goto out;
+ }
+ optional_present = optional_total = 0;
+ DLSYM_OPT (openacc.cuda.get_current_device,
+ openacc_get_current_cuda_device);
+ DLSYM_OPT (openacc.cuda.get_current_context,
+ openacc_get_current_cuda_context);
+ DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
+ DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
+ /* Make sure all the CUDA functions are there if any of them are. */
+ if (optional_present && optional_present != optional_total)
+ {
+ err = "plugin missing OpenACC CUDA handler function";
+ goto out;
+ }
+ }
#undef DLSYM
+#undef DLSYM_OPT
- return true;
+ out:
+ if (err != NULL)
+ {
+ gomp_error ("while loading %s: %s", plugin_name, err);
+ if (last_missing)
+ gomp_error ("missing function was %s", last_missing);
+ if (plugin_handle)
+ dlclose (plugin_handle);
+ }
+ return err == NULL;
}
-/* This function finds OFFLOAD_IMAGES corresponding to DEVICE type, and
- registers them in the plugin. */
+/* This function adds a compatible offload image IMAGE to an accelerator device
+ DEVICE. DEVICE must be locked on entry, and remains locked on return. */
static void
-gomp_register_images_for_device (struct gomp_device_descr *device)
+gomp_register_image_for_device (struct gomp_device_descr *device,
+ struct offload_image_descr *image)
{
- int i;
- for (i = 0; i < num_offload_images; i++)
+ if (!device->offload_regions_registered
+ && (device->type == image->type
+ || device->type == OFFLOAD_TARGET_TYPE_HOST))
{
- struct offload_image_descr *image = &offload_images[i];
- if (image->type == device->type)
- device->register_image_func (image->host_table, image->target_data);
+ device->register_image_func (image->host_table, image->target_data);
+ device->offload_regions_registered = true;
}
}
/* This function initializes the runtime needed for offloading.
- It parses the list of offload targets and tries to load the plugins for these
- targets. Result of the function is properly initialized variable NUM_DEVICES
- and array DEVICES, containing descriptors for corresponding devices. */
+ It parses the list of offload targets and tries to load the plugins for
+ these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
+ will be set, and the array DEVICES initialized, containing descriptors for
+ corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
+ by the others. */
static void
gomp_target_init (void)
@@ -894,6 +1086,8 @@ gomp_target_init (void)
new_num_devices = current_device.get_num_devices_func ();
if (new_num_devices >= 1)
{
+ /* Augment DEVICES and NUM_DEVICES. */
+
devices = realloc (devices, (num_devices + new_num_devices)
* sizeof (struct gomp_device_descr));
if (!devices)
@@ -903,16 +1097,21 @@ gomp_target_init (void)
break;
}
+ current_device.name = current_device.get_name_func ();
+ /* current_device.capabilities has already been set. */
current_device.type = current_device.get_type_func ();
+ current_device.mem_map.is_initialized = false;
+ current_device.mem_map.splay_tree.root = NULL;
current_device.is_initialized = false;
- current_device.dev_splay_tree.root = NULL;
- gomp_register_images_for_device (&current_device);
+ current_device.offload_regions_registered = false;
+ current_device.openacc.data_environ = NULL;
+ current_device.openacc.target_data = NULL;
for (i = 0; i < new_num_devices; i++)
{
- current_device.id = num_devices + 1;
current_device.target_id = i;
devices[num_devices] = current_device;
- gomp_mutex_init (&devices[num_devices].dev_env_lock);
+ gomp_mutex_init (&devices[num_devices].mem_map.lock);
+ gomp_mutex_init (&devices[num_devices].lock);
num_devices++;
}
}
@@ -923,6 +1122,41 @@ gomp_target_init (void)
}
while (next);
+ /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
+ NUM_DEVICES_OPENMP. */
+ struct gomp_device_descr *devices_s
+ = malloc (num_devices * sizeof (struct gomp_device_descr));
+ if (!devices_s)
+ {
+ num_devices = 0;
+ free (devices);
+ devices = NULL;
+ }
+ num_devices_openmp = 0;
+ for (i = 0; i < num_devices; i++)
+ if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ devices_s[num_devices_openmp++] = devices[i];
+ int num_devices_after_openmp = num_devices_openmp;
+ for (i = 0; i < num_devices; i++)
+ if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ devices_s[num_devices_after_openmp++] = devices[i];
+ free (devices);
+ devices = devices_s;
+
+ for (i = 0; i < num_devices; i++)
+ {
+ int j;
+
+ for (j = 0; j < num_offload_images; j++)
+ gomp_register_image_for_device (&devices[i], &offload_images[j]);
+
+ /* The 'devices' array can be moved (by the realloc call) until we have
+ found all the plugins, so registering with the OpenACC runtime (which
+ takes a copy of the pointer argument) must be delayed until now. */
+ if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
+ goacc_register (&devices[i]);
+ }
+
free (offload_images);
offload_images = NULL;
num_offload_images = 0;
diff --git a/libgomp/testsuite/Makefile.am b/libgomp/testsuite/Makefile.am
index 9cc103a..66a9d94 100644
--- a/libgomp/testsuite/Makefile.am
+++ b/libgomp/testsuite/Makefile.am
@@ -12,7 +12,16 @@ _RUNTEST = $(shell if test -f $(top_srcdir)/../dejagnu/runtest; then \
echo $(top_srcdir)/../dejagnu/runtest; else echo runtest; fi)
RUNTEST = "$(_RUNTEST) $(AM_RUNTESTFLAGS)"
-# Used for support non-fallback offloading.
-export OFFLOAD_TARGETS = $(offload_targets)
-export OFFLOAD_ADDITIONAL_OPTIONS = $(offload_additional_options)
-export OFFLOAD_ADDITIONAL_LIB_PATHS = $(offload_additional_lib_paths)
+
+# Instead of directly in ../testsuite/libgomp-test-support.exp.in, the
+# following variables have to be "routed through" this Makefile, for expansion
+# of the several (Makefile) variables used therein.
+libgomp-test-support.exp: libgomp-test-support.pt.exp Makefile
+ cp $< $@.tmp
+ echo >> $@.tmp \
+ 'set offload_additional_options "$(offload_additional_options)"'
+ echo >> $@.tmp \
+ 'set offload_additional_lib_paths "$(offload_additional_lib_paths)"'
+ mv $@.tmp $@
+
+all-local: libgomp-test-support.exp
diff --git a/libgomp/testsuite/Makefile.in b/libgomp/testsuite/Makefile.in
index 2f845f0..352fc3f 100644
--- a/libgomp/testsuite/Makefile.in
+++ b/libgomp/testsuite/Makefile.in
@@ -35,7 +35,8 @@ build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
subdir = testsuite
-DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+ $(srcdir)/libgomp-test-support.exp.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
$(top_srcdir)/../config/depstand.m4 \
@@ -49,12 +50,13 @@ am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
$(top_srcdir)/../config/tls.m4 $(top_srcdir)/../ltoptions.m4 \
$(top_srcdir)/../ltsugar.m4 $(top_srcdir)/../ltversion.m4 \
$(top_srcdir)/../lt~obsolete.m4 $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/../libtool.m4 $(top_srcdir)/configure.ac
+ $(top_srcdir)/../libtool.m4 $(top_srcdir)/plugin/configfrag.ac \
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs
CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_FILES = libgomp-test-support.pt.exp
CONFIG_CLEAN_VPATH_FILES =
SOURCES =
DEJATOOL = $(PACKAGE)
@@ -71,6 +73,8 @@ CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
+CUDA_DRIVER_INCLUDE = @CUDA_DRIVER_INCLUDE@
+CUDA_DRIVER_LIB = @CUDA_DRIVER_LIB@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
@@ -129,6 +133,10 @@ PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PERL = @PERL@
+PLUGIN_NVPTX = @PLUGIN_NVPTX@
+PLUGIN_NVPTX_CPPFLAGS = @PLUGIN_NVPTX_CPPFLAGS@
+PLUGIN_NVPTX_LDFLAGS = @PLUGIN_NVPTX_LDFLAGS@
+PLUGIN_NVPTX_LIBS = @PLUGIN_NVPTX_LIBS@
RANLIB = @RANLIB@
SECTION_LDFLAGS = @SECTION_LDFLAGS@
SED = @SED@
@@ -250,6 +258,8 @@ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
+libgomp-test-support.pt.exp: $(top_builddir)/config.status $(srcdir)/libgomp-test-support.exp.in
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
mostlyclean-libtool:
-rm -f *.lo
@@ -303,7 +313,7 @@ distclean-DEJAGNU:
check-am: all-am
$(MAKE) $(AM_MAKEFLAGS) check-DEJAGNU
check: check-am
-all-am: Makefile
+all-am: Makefile all-local
installdirs:
install: install-am
install-exec: install-exec-am
@@ -398,23 +408,31 @@ uninstall-am:
.MAKE: check-am install-am install-strip
-.PHONY: all all-am check check-DEJAGNU check-am clean clean-generic \
- clean-libtool distclean distclean-DEJAGNU distclean-generic \
- distclean-libtool dvi dvi-am html html-am info info-am install \
- install-am install-data install-data-am install-dvi \
- install-dvi-am install-exec install-exec-am install-html \
- install-html-am install-info install-info-am install-man \
- install-pdf install-pdf-am install-ps install-ps-am \
- install-strip installcheck installcheck-am installdirs \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
- uninstall uninstall-am
-
-
-# Used for support non-fallback offloading.
-export OFFLOAD_TARGETS = $(offload_targets)
-export OFFLOAD_ADDITIONAL_OPTIONS = $(offload_additional_options)
-export OFFLOAD_ADDITIONAL_LIB_PATHS = $(offload_additional_lib_paths)
+.PHONY: all all-am all-local check check-DEJAGNU check-am clean \
+ clean-generic clean-libtool distclean distclean-DEJAGNU \
+ distclean-generic distclean-libtool dvi dvi-am html html-am \
+ info info-am install install-am install-data install-data-am \
+ install-dvi install-dvi-am install-exec install-exec-am \
+ install-html install-html-am install-info install-info-am \
+ install-man install-pdf install-pdf-am install-ps \
+ install-ps-am install-strip installcheck installcheck-am \
+ installdirs maintainer-clean maintainer-clean-generic \
+ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+ ps ps-am uninstall uninstall-am
+
+
+# Instead of directly in ../testsuite/libgomp-test-support.exp.in, the
+# following variables have to be "routed through" this Makefile, for expansion
+# of the several (Makefile) variables used therein.
+libgomp-test-support.exp: libgomp-test-support.pt.exp Makefile
+ cp $< $@.tmp
+ echo >> $@.tmp \
+ 'set offload_additional_options "$(offload_additional_options)"'
+ echo >> $@.tmp \
+ 'set offload_additional_lib_paths "$(offload_additional_lib_paths)"'
+ mv $@.tmp $@
+
+all-local: libgomp-test-support.exp
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index 2d6f822..5a6eec1 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -32,6 +32,29 @@ load_gcc_lib timeout-dg.exp
load_gcc_lib torture-options.exp
load_gcc_lib fortran-modules.exp
+# Try to load a test support file, built during libgomp configuration.
+load_file libgomp-test-support.exp
+
+# Populate offload_targets_s (offloading targets separated by a space), and
+# offload_targets_s_openacc (the same, but with OpenACC names; OpenACC spells
+# some of them a little differently).
+set offload_targets_s [split $offload_targets ","]
+set offload_targets_s_openacc {}
+foreach offload_target_openacc $offload_targets_s {
+ switch $offload_target_openacc {
+ intelmic {
+ # Skip; will all FAIL because of missing
+ # GOMP_OFFLOAD_CAP_OPENACC_200.
+ continue
+ }
+ nvptx {
+ set offload_target_openacc "nvidia"
+ }
+ }
+ lappend offload_targets_s_openacc "$offload_target_openacc"
+}
+lappend offload_targets_s_openacc "host"
+
set dg-do-what-default run
#
@@ -108,13 +131,9 @@ proc libgomp_init { args } {
# Compute what needs to be put into LD_LIBRARY_PATH
set always_ld_library_path ".:${blddir}/.libs"
- # Get offload-related variables from environment (exported by Makefile)
- set offload_targets [getenv OFFLOAD_TARGETS]
- set offload_additional_options [getenv OFFLOAD_ADDITIONAL_OPTIONS]
- set offload_additional_lib_paths [getenv OFFLOAD_ADDITIONAL_LIB_PATHS]
-
# Add liboffloadmic build directory in LD_LIBRARY_PATH to support
# non-fallback testing for Intel MIC targets
+ global offload_targets
if { [string match "*,intelmic,*" ",$offload_targets,"] } {
append always_ld_library_path ":${blddir}/../liboffloadmic/.libs"
append always_ld_library_path ":${blddir}/../liboffloadmic/plugin/.libs"
@@ -122,6 +141,7 @@ proc libgomp_init { args } {
append always_ld_library_path ":${blddir}/../libstdc++-v3/src/.libs"
}
+ global offload_additional_lib_paths
if { $offload_additional_lib_paths != "" } {
append always_ld_library_path "${offload_additional_lib_paths}"
}
@@ -158,9 +178,29 @@ proc libgomp_init { args } {
lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/.libs"
lappend ALWAYS_CFLAGS "additional_flags=-I${blddir}"
lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/.libs"
+ # The top-level include directory, for gomp-constants.h.
+ lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/../../include"
}
lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/.."
+ # For build-tree testing, also consider the CUDA paths used for builing.
+ # For installed testing, we assume all that to be provided in the sysroot.
+ if { $blddir != "" } {
+ global cuda_driver_include
+ global cuda_driver_lib
+ if { $cuda_driver_include != "" } {
+ # Stop gfortran from freaking out:
+ # Warning: Nonexistent include directory "[...]"
+ if {[file exists $cuda_driver_include]} {
+ lappend ALWAYS_CFLAGS "additional_flags=-I$cuda_driver_include"
+ }
+ }
+ if { $cuda_driver_lib != "" } {
+ lappend ALWAYS_CFLAGS "additional_flags=-L$cuda_driver_lib"
+ append always_ld_library_path ":$cuda_driver_lib"
+ }
+ }
+
# We use atomic operations in the testcases to validate results.
if { ([istarget i?86-*-*] || [istarget x86_64-*-*])
&& [check_effective_target_ia32] } {
@@ -191,6 +231,7 @@ proc libgomp_init { args } {
# Used for support non-fallback offloading.
# Help GCC to find target mkoffload.
+ global offload_additional_options
if { $offload_additional_options != "" } {
lappend ALWAYS_CFLAGS "additional_flags=${offload_additional_options}"
}
@@ -278,3 +319,29 @@ proc check_effective_target_offload_device { } {
}
} ]
}
+
+# Return 1 if at least one nvidia board is present.
+
+proc check_effective_target_openacc_nvidia_accel_present { } {
+ return [check_runtime openacc_nvidia_accel_present {
+ #include <openacc.h>
+ int main () {
+ return !(acc_get_num_devices (acc_device_nvidia) > 0);
+ }
+ } "" ]
+}
+
+# Return 1 if at least one nvidia board is present, and the nvidia device type
+# is selected by default by means of setting the environment variable
+# ACC_DEVICE_TYPE.
+
+proc check_effective_target_openacc_nvidia_accel_selected { } {
+ if { ![check_effective_target_openacc_nvidia_accel_present] } {
+ return 0;
+ }
+ global offload_target_openacc
+ if { $offload_target_openacc == "nvidia" } {
+ return 1;
+ }
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp-test-support.exp.in b/libgomp/testsuite/libgomp-test-support.exp.in
new file mode 100644
index 0000000..764bec0
--- /dev/null
+++ b/libgomp/testsuite/libgomp-test-support.exp.in
@@ -0,0 +1,4 @@
+set cuda_driver_include "@CUDA_DRIVER_INCLUDE@"
+set cuda_driver_lib "@CUDA_DRIVER_LIB@"
+
+set offload_targets "@offload_targets@"
diff --git a/libgomp/testsuite/libgomp.oacc-c++/c++.exp b/libgomp/testsuite/libgomp.oacc-c++/c++.exp
new file mode 100644
index 0000000..f486f9b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/c++.exp
@@ -0,0 +1,107 @@
+# This whole file adapted from libgomp.c++/c++.exp.
+
+load_lib libgomp-dg.exp
+load_gcc_lib gcc-dg.exp
+
+global shlib_ext
+
+set shlib_ext [get_shlib_extension]
+set lang_link_flags "-lstdc++"
+set lang_test_file_found 0
+set lang_library_path "../libstdc++-v3/src/.libs"
+if [info exists lang_include_flags] then {
+ unset lang_include_flags
+}
+
+# Initialize dg.
+dg-init
+
+# Turn on OpenACC.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc"
+
+# Switch into C++ mode. Otherwise, the libgomp.oacc-c-c++-common/*.c
+# files would be compiled as C files.
+set SAVE_GCC_UNDER_TEST "$GCC_UNDER_TEST"
+set GCC_UNDER_TEST "$GCC_UNDER_TEST -x c++"
+
+set blddir [lookfor_file [get_multilibs] libgomp]
+
+
+if { $blddir != "" } {
+ # Look for a static libstdc++ first.
+ if [file exists "${blddir}/${lang_library_path}/libstdc++.a"] {
+ set lang_test_file "${lang_library_path}/libstdc++.a"
+ set lang_test_file_found 1
+ # We may have a shared only build, so look for a shared libstdc++.
+ } elseif [file exists "${blddir}/${lang_library_path}/libstdc++.${shlib_ext}"] {
+ set lang_test_file "${lang_library_path}/libstdc++.${shlib_ext}"
+ set lang_test_file_found 1
+ } else {
+ puts "No libstdc++ library found, will not execute c++ tests"
+ }
+} elseif { [info exists GXX_UNDER_TEST] } {
+ set lang_test_file_found 1
+ # Needs to exist for libgomp.exp.
+ set lang_test_file ""
+} else {
+ puts "GXX_UNDER_TEST not defined, will not execute c++ tests"
+}
+
+if { $lang_test_file_found } {
+ # Gather a list of all tests.
+ set tests [lsort [concat \
+ [find $srcdir/$subdir *.C] \
+ [find $srcdir/$subdir/../libgomp.oacc-c-c++-common *.c]]]
+
+ if { $blddir != "" } {
+ set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}"
+ } else {
+ set ld_library_path "$always_ld_library_path"
+ }
+ append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+ set_ld_library_path_env_vars
+
+ set flags_file "${blddir}/../libstdc++-v3/scripts/testsuite_flags"
+ if { [file exists $flags_file] } {
+ set libstdcxx_includes [exec sh $flags_file --build-includes]
+ } else {
+ set libstdcxx_includes ""
+ }
+
+ # Test OpenACC with available accelerators.
+ foreach offload_target_openacc $offload_targets_s_openacc {
+ set tagopt "-DACC_DEVICE_TYPE_$offload_target_openacc=1"
+
+ switch $offload_target_openacc {
+ host {
+ set acc_mem_shared 1
+ }
+ host_nonshm {
+ set acc_mem_shared 0
+ }
+ nvidia {
+ # Copy ptx file (TEMPORARY)
+ remote_download host $srcdir/libgomp.oacc-c-c++-common/subr.ptx
+
+ # Where timer.h lives
+ lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/libgomp.oacc-c-c++-common"
+
+ set acc_mem_shared 0
+ }
+ default {
+ set acc_mem_shared 0
+ }
+ }
+ set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+ setenv ACC_DEVICE_TYPE $offload_target_openacc
+
+ dg-runtest $tests "$tagopt" $libstdcxx_includes
+ }
+}
+
+# See above.
+set GCC_UNDER_TEST "$SAVE_GCC_UNDER_TEST"
+
+# All done.
+dg-finish
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-1.c
new file mode 100644
index 0000000..f88b9e3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-1.c
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+/* { dg-shouldfail "" { *-*-* } { "*" } { "" } } */
+
+#include <stdlib.h>
+
+int
+main (void)
+{
+
+#pragma acc parallel
+ {
+ abort ();
+ }
+
+ return 0;
+}
+
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-2.c
new file mode 100644
index 0000000..debb81e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-2.c
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main (int argc, char **argv)
+{
+
+#pragma acc parallel
+ {
+ if (argc != 1)
+ abort ();
+ }
+
+ return 0;
+}
+
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-3.c
new file mode 100644
index 0000000..be7aaa8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-3.c
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+/* { dg-shouldfail "" { *-*-* } { "*" } { "" } } */
+
+#include <stdlib.h>
+
+int
+main (void)
+{
+
+#pragma acc kernels
+ {
+ abort ();
+ }
+
+ return 0;
+}
+
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-4.c
new file mode 100644
index 0000000..c29ca3f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abort-4.c
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main (int argc, char **argv)
+{
+
+#pragma acc kernels
+ {
+ if (argc != 1)
+ abort ();
+ }
+
+ return 0;
+}
+
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c
new file mode 100644
index 0000000..81ea476
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c
@@ -0,0 +1,75 @@
+/* Disable the acc_on_device builtin; we want to test the libgomp library
+ function. */
+/* { dg-additional-options "-fno-builtin-acc_on_device" } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char *argv[])
+{
+ /* Host. */
+
+ {
+ if (!acc_on_device (acc_device_none))
+ abort ();
+ if (!acc_on_device (acc_device_host))
+ abort ();
+ if (acc_on_device (acc_device_host_nonshm))
+ abort ();
+ if (acc_on_device (acc_device_not_host))
+ abort ();
+ if (acc_on_device (acc_device_nvidia))
+ abort ();
+ }
+
+
+ /* Host via offloading fallback mode. */
+
+#pragma acc parallel if(0)
+ {
+ if (!acc_on_device (acc_device_none))
+ abort ();
+ if (!acc_on_device (acc_device_host))
+ abort ();
+ if (acc_on_device (acc_device_host_nonshm))
+ abort ();
+ if (acc_on_device (acc_device_not_host))
+ abort ();
+ if (acc_on_device (acc_device_nvidia))
+ abort ();
+ }
+
+
+#if !ACC_DEVICE_TYPE_host
+
+ /* Offloaded. */
+
+#pragma acc parallel
+ {
+ if (acc_on_device (acc_device_none))
+ abort ();
+ if (acc_on_device (acc_device_host))
+ abort ();
+#if ACC_DEVICE_TYPE_host_nonshm
+ if (!acc_on_device (acc_device_host_nonshm))
+ abort ();
+#else
+ if (acc_on_device (acc_device_host_nonshm))
+ abort ();
+#endif
+ if (!acc_on_device (acc_device_not_host))
+ abort ();
+#if ACC_DEVICE_TYPE_nvidia
+ if (!acc_on_device (acc_device_nvidia))
+ abort ();
+#else
+ if (acc_on_device (acc_device_nvidia))
+ abort ();
+#endif
+ }
+
+#endif
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
new file mode 100644
index 0000000..22cef6d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
@@ -0,0 +1,466 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <openacc.h>
+#include <stdlib.h>
+#include "cuda.h"
+
+#include <stdio.h>
+#include <sys/time.h>
+
+int
+main (int argc, char **argv)
+{
+ CUresult r;
+ CUstream stream1;
+ int N = 128; //1024 * 1024;
+ float *a, *b, *c, *d, *e;
+ int i;
+ int nbytes;
+
+ acc_init (acc_device_nvidia);
+
+ nbytes = N * sizeof (float);
+
+ a = (float *) malloc (nbytes);
+ b = (float *) malloc (nbytes);
+ c = (float *) malloc (nbytes);
+ d = (float *) malloc (nbytes);
+ e = (float *) malloc (nbytes);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc wait
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 2.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 9.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc parallel wait (1) async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 4.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+
+ if (e[i] != 11.0)
+ abort ();
+ }
+
+
+ r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ acc_set_cuda_stream (1, stream1);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N], b[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 5.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 7.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 7.0)
+ abort ();
+
+ if (b[i] != 49.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc parallel wait (1) async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 9.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+
+ if (e[i] != 17.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort ();
+
+ if (b[i] != 16.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+ }
+
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+ {
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc parallel async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 25.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/cache-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/cache-1.c
new file mode 100644
index 0000000..3f1f0bb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/cache-1.c
@@ -0,0 +1,48 @@
+int
+main (int argc, char **argv)
+{
+#define N 2
+ int a[N], b[N];
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3;
+ b[i] = 0;
+ }
+
+#pragma acc parallel copyin (a[0:N]) copyout (b[0:N])
+{
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ const int idx = ii;
+ int n = 1;
+ const int len = n;
+
+#pragma acc cache (a[0:N])
+
+#pragma acc cache (a[0:N], b[0:N])
+
+#pragma acc cache (a[0])
+
+#pragma acc cache (a[0], a[1], b[0:N])
+
+#pragma acc cache (a[idx])
+
+#pragma acc cache (a[idx:len])
+
+ b[ii] = a[ii];
+ }
+}
+
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != b[i])
+ __builtin_abort ();
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c
new file mode 100644
index 0000000..51c0cf5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c
@@ -0,0 +1,623 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main (int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b, *c, *d;
+ int i;
+
+ a = (float *) malloc (N * sizeof (float));
+ b = (float *) malloc (N * sizeof (float));
+ c = (float *) malloc (N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc parallel copyin (a[0:N]) copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc parallel copyin (a[0:N]) copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 5.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+ d = (float *) acc_copyin (&a[0], N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc parallel present_or_copyin (a[0:N]) copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc parallel copyin (a[0:N]) present_or_copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 2.0;
+ }
+
+ d = (float *) acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc parallel copyin (a[0:N]) present_or_copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 2.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ acc_free (d);
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 4.0;
+ }
+
+#pragma acc parallel copy (a[0:N]) copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ a[ii] = a[ii] + 1;
+ b[ii] = a[ii] + 2;
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort ();
+
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 7.0;
+ }
+
+#pragma acc parallel present_or_copy (a[0:N]) present_or_copy (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ a[ii] = a[ii] + 1;
+ b[ii] = b[ii] + 2;
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 9.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 7.0;
+ }
+
+ d = (float *) acc_copyin (&a[0], N * sizeof (float));
+ d = (float *) acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc parallel present_or_copy (a[0:N]) present_or_copy (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ a[ii] = a[ii] + 1;
+ b[ii] = b[ii] + 2;
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 7.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ d = (float *) acc_deviceptr (&a[0]);
+ acc_unmap_data (&a[0]);
+ acc_free (d);
+
+ d = (float *) acc_deviceptr (&b[0]);
+ acc_unmap_data (&b[0]);
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 7.0;
+ }
+
+#pragma acc parallel copyin (a[0:N]) create (c[0:N]) copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&c[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 8.0;
+ }
+
+#pragma acc parallel copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort ();
+
+ if (b[i] != 4.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&c[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 5.0;
+ }
+
+ d = (float *) acc_malloc (N * sizeof (float));
+ acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc parallel copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 2.0)
+ abort ();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort ();
+
+ d = (float *) acc_deviceptr (c);
+
+ acc_unmap_data (c);
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 8.0;
+ }
+
+ d = (float *) acc_malloc (N * sizeof (float));
+ acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc parallel copyin (a[0:N]) present (c[0:N]) copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort ();
+
+ if (b[i] != 4.0)
+ abort ();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort ();
+
+ acc_unmap_data (c);
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 8.0;
+ }
+
+ acc_copyin (a, N * sizeof (float));
+
+ d = (float *) acc_malloc (N * sizeof (float));
+ acc_map_data (b, d, N * sizeof (float));
+
+ d = (float *) acc_malloc (N * sizeof (float));
+ acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc parallel present (a[0:N]) present (c[0:N]) present (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+
+ if (!acc_is_present (a, (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (b, (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort ();
+
+ acc_copyout (b, N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort ();
+
+ if (b[i] != 4.0)
+ abort ();
+ }
+
+ d = (float *) acc_deviceptr (a);
+
+ acc_unmap_data (a);
+
+ acc_free (d);
+
+ d = (float *) acc_deviceptr (c);
+
+ acc_unmap_data (c);
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 6.0;
+ }
+
+ d = (float *) acc_malloc (N * sizeof (float));
+
+#pragma acc parallel copyin (a[0:N]) deviceptr (d) copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ d[ii] = a[ii];
+ b[ii] = d[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort ();
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+ d = (float *) acc_copyin (&a[0], N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc parallel pcopyin (a[0:N]) copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc parallel copyin (a[0:N]) pcopyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 7.0;
+ }
+
+#pragma acc parallel copyin (a[0:N]) pcreate (c[0:N]) copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 5.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&c[0], (N * sizeof (float))))
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-2.c
new file mode 100644
index 0000000..8dc45cb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-2.c
@@ -0,0 +1,67 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main (int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b, *c, *d;
+ int i;
+
+ a = (float *) malloc (N * sizeof (float));
+ b = (float *) malloc (N * sizeof (float));
+ c = (float *) malloc (N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 5.0;
+ }
+
+ d = (float *) acc_malloc (N * sizeof (float));
+ acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc parallel copyin (a[0:N]) present_or_create (c[0:N+1]) copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 2.0)
+ abort ();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort ();
+
+ d = (float *) acc_deviceptr (c);
+
+ acc_unmap_data (c);
+
+ acc_free (d);
+
+ return 0;
+}
+/* { dg-shouldfail "libgomp: \[\h+,\d+\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-1.c
new file mode 100644
index 0000000..80fed6c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-1.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+
+int
+main (void)
+{
+ int i, j, k, l = 0;
+ int a[3][3][3];
+
+ memset (a, '\0', sizeof (a));
+ #pragma acc parallel
+ #pragma acc loop collapse(4 - 1)
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ for (k = 0; k < 2; k++)
+ a[i][j][k] = i + j * 4 + k * 16;
+ #pragma acc parallel
+ {
+ #pragma acc loop collapse(2) reduction(|:l)
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ for (k = 0; k < 2; k++)
+ if (a[i][j][k] != i + j * 4 + k * 16)
+ l = 1;
+ }
+ if (l)
+ abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c
new file mode 100644
index 0000000..44a77f7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main (void)
+{
+ int i, j, k, l = 0, f = 0, x = 0;
+ int m1 = 4, m2 = -5, m3 = 17;
+
+ #pragma acc parallel
+ #pragma acc loop collapse(3) reduction(+:l)
+ for (i = -2; i < m1; i++)
+ for (j = m2; j < -2; j++)
+ {
+ for (k = 13; k < m3; k++)
+ {
+ if ((i + 2) * 12 + (j + 5) * 4 + (k - 13) != 9 + f++)
+ l++;
+ }
+ }
+
+ for (i = -2; i < m1; i++)
+ for (j = m2; j < -2; j++)
+ {
+ for (k = 13; k < m3; k++)
+ {
+ if ((i + 2) * 12 + (j + 5) * 4 + (k - 13) != 9 + f++)
+ x++;
+ }
+ }
+
+ if (l != x)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c
new file mode 100644
index 0000000..a5be728
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-3.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+int
+main (void)
+{
+ int i2, l = 0, r = 0;
+ int a[3][3][3];
+
+ memset (a, '\0', sizeof (a));
+ #pragma acc parallel
+ #pragma acc loop collapse(4 - 1)
+ for (int i = 0; i < 2; i++)
+ for (int j = 0; j < 2; j++)
+ for (int k = 0; k < 2; k++)
+ a[i][j][k] = i + j * 4 + k * 16;
+#pragma acc parallel
+ {
+ #pragma acc loop collapse(2) reduction(|:l)
+ for (i2 = 0; i2 < 2; i2++)
+ for (int j = 0; j < 2; j++)
+ for (int k = 0; k < 2; k++)
+ if (a[i2][j][k] != i2 + j * 4 + k * 16)
+ l += 1;
+ }
+
+ for (i2 = 0; i2 < 2; i2++)
+ for (int j = 0; j < 2; j++)
+ for (int k = 0; k < 2; k++)
+ if (a[i2][j][k] != i2 + j * 4 + k * 16)
+ r += 1;
+
+ if (l != r)
+ abort ();
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-4.c
new file mode 100644
index 0000000..52dd435
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-4.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+
+#include <string.h>
+
+int
+main (void)
+{
+ int l = 0;
+ int b[3][3];
+ int i, j;
+
+ memset (b, '\0', sizeof (b));
+
+#pragma acc parallel copy(b[0:3][0:3]) copy(l)
+ {
+#pragma acc loop collapse(2) reduction(+:l)
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ if (b[i][j] != 16)
+ l += 1;
+ }
+
+ if (l != 2 * 2)
+ __builtin_abort();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-1.c
new file mode 100644
index 0000000..dabc706
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-1.c
@@ -0,0 +1,213 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy (int n, float a, float *x, float *y)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ {
+ y[i] = a * x[i] + y[i];
+ }
+}
+
+void
+context_check (CUcontext ctx1)
+{
+ CUcontext ctx2, ctx3;
+ CUresult r;
+
+ r = cuCtxGetCurrent (&ctx2);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ if (ctx1 != ctx2)
+ {
+ fprintf (stderr, "new context established\n");
+ exit (EXIT_FAILURE);
+ }
+
+ ctx3 = (CUcontext) acc_get_current_cuda_context ();
+
+ if (ctx1 != ctx3)
+ {
+ fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
+ exit (EXIT_FAILURE);
+ }
+
+ return;
+}
+
+int
+main (int argc, char **argv)
+{
+ cublasStatus_t s;
+ cudaError_t e;
+ cublasHandle_t h;
+ CUcontext pctx, ctx;
+ CUresult r;
+ int dev;
+ int i;
+ const int N = 256;
+ float *h_X, *h_Y1, *h_Y2;
+ float *d_X,*d_Y;
+ float alpha = 2.0f;
+ float error_norm;
+ float ref_norm;
+
+ /* Test 1 - cuBLAS creates, OpenACC shares. */
+
+ s = cublasCreate (&h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasCreate failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ r = cuCtxGetCurrent (&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ e = cudaGetDevice (&dev);
+ if (e != cudaSuccess)
+ {
+ fprintf (stderr, "cudaGetDevice failed: %d\n", e);
+ exit (EXIT_FAILURE);
+ }
+
+ acc_set_device_num (dev, acc_device_nvidia);
+
+ h_X = (float *) malloc (N * sizeof (float));
+ if (!h_X)
+ {
+ fprintf (stderr, "malloc failed: for h_X\n");
+ exit (EXIT_FAILURE);
+ }
+
+ h_Y1 = (float *) malloc (N * sizeof (float));
+ if (!h_Y1)
+ {
+ fprintf (stderr, "malloc failed: for h_Y1\n");
+ exit (EXIT_FAILURE);
+ }
+
+ h_Y2 = (float *) malloc (N * sizeof (float));
+ if (!h_Y2)
+ {
+ fprintf (stderr, "malloc failed: for h_Y2\n");
+ exit (EXIT_FAILURE);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ h_X[i] = rand () / (float) RAND_MAX;
+ h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
+ }
+
+ d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
+ if (d_X == NULL)
+ {
+ fprintf (stderr, "copyin error h_X\n");
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
+ if (d_Y == NULL)
+ {
+ fprintf (stderr, "copyin error h_Y1\n");
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasSaxpy failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
+
+ context_check (pctx);
+
+ saxpy (N, alpha, h_X, h_Y2);
+
+ error_norm = 0;
+ ref_norm = 0;
+
+ for (i = 0; i < N; ++i)
+ {
+ float diff;
+
+ diff = h_Y1[i] - h_Y2[i];
+ error_norm += diff * diff;
+ ref_norm += h_Y2[i] * h_Y2[i];
+ }
+
+ error_norm = (float) sqrt ((double) error_norm);
+ ref_norm = (float) sqrt ((double) ref_norm);
+
+ if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+ {
+ fprintf (stderr, "math error\n");
+ exit (EXIT_FAILURE);
+ }
+
+ free (h_X);
+ free (h_Y1);
+ free (h_Y2);
+
+ acc_free (d_X);
+ acc_free (d_Y);
+
+ context_check (pctx);
+
+ s = cublasDestroy (h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasDestroy failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ r = cuCtxGetCurrent (&ctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ if (!ctx)
+ {
+ fprintf (stderr, "Expected context\n");
+ exit (EXIT_FAILURE);
+ }
+
+ if (pctx != ctx)
+ {
+ fprintf (stderr, "Unexpected new context\n");
+ exit (EXIT_FAILURE);
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c
new file mode 100644
index 0000000..6a52f74
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c
@@ -0,0 +1,223 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy (int n, float a, float *x, float *y)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ {
+ y[i] = a * x[i] + y[i];
+ }
+}
+
+void
+context_check (CUcontext ctx1)
+{
+ CUcontext ctx2, ctx3;
+ CUresult r;
+
+ r = cuCtxGetCurrent (&ctx2);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ if (ctx1 != ctx2)
+ {
+ fprintf (stderr, "new context established\n");
+ exit (EXIT_FAILURE);
+ }
+
+ ctx3 = (CUcontext) acc_get_current_cuda_context ();
+
+ if (ctx1 != ctx3)
+ {
+ fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
+ exit (EXIT_FAILURE);
+ }
+
+ return;
+}
+
+int
+main (int argc, char **argv)
+{
+ cublasStatus_t s;
+ cudaError_t e;
+ cublasHandle_t h;
+ CUcontext pctx, ctx;
+ CUresult r;
+ int dev;
+ int i;
+ const int N = 256;
+ float *h_X, *h_Y1, *h_Y2;
+ float *d_X,*d_Y;
+ float alpha = 2.0f;
+ float error_norm;
+ float ref_norm;
+
+ /* Test 2 - cuBLAS creates, OpenACC shares. */
+
+ s = cublasCreate (&h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasCreate failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ r = cuCtxGetCurrent (&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ e = cudaGetDevice (&dev);
+ if (e != cudaSuccess)
+ {
+ fprintf (stderr, "cudaGetDevice failed: %d\n", e);
+ exit (EXIT_FAILURE);
+ }
+
+ acc_set_device_num (dev, acc_device_nvidia);
+
+ h_X = (float *) malloc (N * sizeof (float));
+ if (h_X == 0)
+ {
+ fprintf (stderr, "malloc failed: for h_X\n");
+ exit (EXIT_FAILURE);
+ }
+
+ h_Y1 = (float *) malloc (N * sizeof (float));
+ if (h_Y1 == 0)
+ {
+ fprintf (stderr, "malloc failed: for h_Y1\n");
+ exit (EXIT_FAILURE);
+ }
+
+ h_Y2 = (float *) malloc (N * sizeof (float));
+ if (h_Y2 == 0)
+ {
+ fprintf (stderr, "malloc failed: for h_Y2\n");
+ exit (EXIT_FAILURE);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ h_X[i] = rand () / (float) RAND_MAX;
+ h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
+ }
+
+ d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
+ if (d_X == NULL)
+ {
+ fprintf (stderr, "copyin error h_X\n");
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
+ if (d_Y == NULL)
+ {
+ fprintf (stderr, "copyin error h_Y1\n");
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasSaxpy failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
+
+ context_check (pctx);
+
+#pragma acc parallel present (h_X[0:N]), copy (h_Y2[0:N]) copyin (alpha)
+ {
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ h_Y2[i] = alpha * h_X[i] + h_Y2[i];
+ }
+ }
+
+ context_check (pctx);
+
+ error_norm = 0;
+ ref_norm = 0;
+
+ for (i = 0; i < N; ++i)
+ {
+ float diff;
+
+ diff = h_Y1[i] - h_Y2[i];
+ error_norm += diff * diff;
+ ref_norm += h_Y2[i] * h_Y2[i];
+ }
+
+ error_norm = (float) sqrt ((double) error_norm);
+ ref_norm = (float) sqrt ((double) ref_norm);
+
+ if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+ {
+ fprintf (stderr, "math error\n");
+ exit (EXIT_FAILURE);
+ }
+
+ free (h_X);
+ free (h_Y1);
+ free (h_Y2);
+
+ acc_free (d_X);
+ acc_free (d_Y);
+
+ context_check (pctx);
+
+ s = cublasDestroy (h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasDestroy failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ r = cuCtxGetCurrent (&ctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ if (!ctx)
+ {
+ fprintf (stderr, "Expected context\n");
+ exit (EXIT_FAILURE);
+ }
+
+ if (pctx != ctx)
+ {
+ fprintf (stderr, "Unexpected new context\n");
+ exit (EXIT_FAILURE);
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-3.c
new file mode 100644
index 0000000..ccd276c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-3.c
@@ -0,0 +1,200 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy (int n, float a, float *x, float *y)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ {
+ y[i] = a * x[i] + y[i];
+ }
+}
+
+void
+context_check (CUcontext ctx1)
+{
+ CUcontext ctx2, ctx3;
+ CUresult r;
+
+ r = cuCtxGetCurrent (&ctx2);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ if (ctx1 != ctx2)
+ {
+ fprintf (stderr, "new context established\n");
+ exit (EXIT_FAILURE);
+ }
+
+ ctx3 = (CUcontext) acc_get_current_cuda_context ();
+
+ if (ctx1 != ctx3)
+ {
+ fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
+ exit (EXIT_FAILURE);
+ }
+
+ return;
+}
+
+int
+main (int argc, char **argv)
+{
+ cublasStatus_t s;
+ cublasHandle_t h;
+ CUcontext pctx;
+ CUresult r;
+ int i;
+ const int N = 256;
+ float *h_X, *h_Y1, *h_Y2;
+ float *d_X,*d_Y;
+ float alpha = 2.0f;
+ float error_norm;
+ float ref_norm;
+
+ /* Test 3 - OpenACC creates, cuBLAS shares. */
+
+ acc_set_device_num (0, acc_device_nvidia);
+
+ r = cuCtxGetCurrent (&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ h_X = (float *) malloc (N * sizeof (float));
+ if (h_X == 0)
+ {
+ fprintf (stderr, "malloc failed: for h_X\n");
+ exit (EXIT_FAILURE);
+ }
+
+ h_Y1 = (float *) malloc (N * sizeof (float));
+ if (h_Y1 == 0)
+ {
+ fprintf (stderr, "malloc failed: for h_Y1\n");
+ exit (EXIT_FAILURE);
+ }
+
+ h_Y2 = (float *) malloc (N * sizeof (float));
+ if (h_Y2 == 0)
+ {
+ fprintf (stderr, "malloc failed: for h_Y2\n");
+ exit (EXIT_FAILURE);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ h_X[i] = rand () / (float) RAND_MAX;
+ h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
+ }
+
+ d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
+ if (d_X == NULL)
+ {
+ fprintf (stderr, "copyin error h_X\n");
+ exit (EXIT_FAILURE);
+ }
+
+ d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
+ if (d_Y == NULL)
+ {
+ fprintf (stderr, "copyin error h_Y1\n");
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ s = cublasCreate (&h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasCreate failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasSaxpy failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
+
+ context_check (pctx);
+
+ saxpy (N, alpha, h_X, h_Y2);
+
+ error_norm = 0;
+ ref_norm = 0;
+
+ for (i = 0; i < N; ++i)
+ {
+ float diff;
+
+ diff = h_Y1[i] - h_Y2[i];
+ error_norm += diff * diff;
+ ref_norm += h_Y2[i] * h_Y2[i];
+ }
+
+ error_norm = (float) sqrt ((double) error_norm);
+ ref_norm = (float) sqrt ((double) ref_norm);
+
+ if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+ {
+ fprintf (stderr, "math error\n");
+ exit (EXIT_FAILURE);
+ }
+
+ free (h_X);
+ free (h_Y1);
+ free (h_Y2);
+
+ acc_free (d_X);
+ acc_free (d_Y);
+
+ context_check (pctx);
+
+ s = cublasDestroy (h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasDestroy failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ acc_shutdown (acc_device_nvidia);
+
+ r = cuCtxGetCurrent (&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ if (pctx)
+ {
+ fprintf (stderr, "Unexpected context\n");
+ exit (EXIT_FAILURE);
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c
new file mode 100644
index 0000000..71365e8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c
@@ -0,0 +1,213 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda -lcublas -lcudart" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cublas_v2.h>
+#include <openacc.h>
+
+void
+saxpy (int n, float a, float *x, float *y)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ {
+ y[i] = a * x[i] + y[i];
+ }
+}
+
+void
+context_check (CUcontext ctx1)
+{
+ CUcontext ctx2, ctx3;
+ CUresult r;
+
+ r = cuCtxGetCurrent (&ctx2);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ if (ctx1 != ctx2)
+ {
+ fprintf (stderr, "new context established\n");
+ exit (EXIT_FAILURE);
+ }
+
+ ctx3 = (CUcontext) acc_get_current_cuda_context ();
+
+ if (ctx1 != ctx3)
+ {
+ fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n");
+ exit (EXIT_FAILURE);
+ }
+
+ return;
+}
+
+int
+main (int argc, char **argv)
+{
+ cublasStatus_t s;
+ cublasHandle_t h;
+ CUcontext pctx;
+ CUresult r;
+ int i;
+ const int N = 256;
+ float *h_X, *h_Y1, *h_Y2;
+ float *d_X,*d_Y;
+ float alpha = 2.0f;
+ float error_norm;
+ float ref_norm;
+
+ /* Test 4 - OpenACC creates, cuBLAS shares. */
+
+ acc_set_device_num (0, acc_device_nvidia);
+
+ r = cuCtxGetCurrent (&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ h_X = (float *) malloc (N * sizeof (float));
+ if (h_X == 0)
+ {
+ fprintf (stderr, "malloc failed: for h_X\n");
+ exit (EXIT_FAILURE);
+ }
+
+ h_Y1 = (float *) malloc (N * sizeof (float));
+ if (h_Y1 == 0)
+ {
+ fprintf (stderr, "malloc failed: for h_Y1\n");
+ exit (EXIT_FAILURE);
+ }
+
+ h_Y2 = (float *) malloc (N * sizeof (float));
+ if (h_Y2 == 0)
+ {
+ fprintf (stderr, "malloc failed: for h_Y2\n");
+ exit (EXIT_FAILURE);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ h_X[i] = rand () / (float) RAND_MAX;
+ h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
+ }
+
+#pragma acc parallel copyin (h_X[0:N]), copy (h_Y2[0:N]) copy (alpha)
+ {
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ h_Y2[i] = alpha * h_X[i] + h_Y2[i];
+ }
+ }
+
+ r = cuCtxGetCurrent (&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
+ if (d_X == NULL)
+ {
+ fprintf (stderr, "copyin error h_Y1\n");
+ exit (EXIT_FAILURE);
+ }
+
+ d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
+ if (d_Y == NULL)
+ {
+ fprintf (stderr, "copyin error h_Y1\n");
+ exit (EXIT_FAILURE);
+ }
+
+ s = cublasCreate (&h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasCreate failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasSaxpy failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));
+
+ context_check (pctx);
+
+ error_norm = 0;
+ ref_norm = 0;
+
+ for (i = 0; i < N; ++i)
+ {
+ float diff;
+
+ diff = h_Y1[i] - h_Y2[i];
+ error_norm += diff * diff;
+ ref_norm += h_Y2[i] * h_Y2[i];
+ }
+
+ error_norm = (float) sqrt ((double) error_norm);
+ ref_norm = (float) sqrt ((double) ref_norm);
+
+ if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
+ {
+ fprintf (stderr, "math error\n");
+ exit (EXIT_FAILURE);
+ }
+
+ free (h_X);
+ free (h_Y1);
+ free (h_Y2);
+
+ acc_free (d_X);
+ acc_free (d_Y);
+
+ context_check (pctx);
+
+ s = cublasDestroy (h);
+ if (s != CUBLAS_STATUS_SUCCESS)
+ {
+ fprintf (stderr, "cublasDestroy failed: %d\n", s);
+ exit (EXIT_FAILURE);
+ }
+
+ context_check (pctx);
+
+ acc_shutdown (acc_device_nvidia);
+
+ r = cuCtxGetCurrent (&pctx);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
+ exit (EXIT_FAILURE);
+ }
+
+ if (pctx)
+ {
+ fprintf (stderr, "Unexpected context\n");
+ exit (EXIT_FAILURE);
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-1.c
new file mode 100644
index 0000000..e7564cc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-1.c
@@ -0,0 +1,188 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int i;
+
+int
+is_mapped (void *p, size_t n)
+{
+#if ACC_MEM_SHARED
+ return 1;
+#else
+ return acc_is_present (p, n);
+#endif
+}
+
+int main(void)
+{
+ int j;
+
+ i = -1;
+ j = -2;
+#pragma acc data copyin (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ }
+ if (i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+#pragma acc data copyout (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+
+#pragma acc parallel present (i, j)
+ {
+ i = 4;
+ j = 2;
+ }
+ }
+ if (i != 4 || j != 2)
+ abort ();
+
+ i = -1;
+ j = -2;
+#pragma acc data create (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ }
+ if (i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+#pragma acc data present_or_copyin (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ }
+ if (i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+#pragma acc data present_or_copyout (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+
+#pragma acc parallel present (i, j)
+ {
+ i = 4;
+ j = 2;
+ }
+ }
+ if (i != 4 || j != 2)
+ abort ();
+
+ i = -1;
+ j = -2;
+#pragma acc data present_or_copy (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ }
+#if ACC_MEM_SHARED
+ if (i != 2 || j != 1)
+ abort ();
+#else
+ if (i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+#pragma acc data present_or_create (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ }
+
+ if (i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+#pragma acc data copyin (i, j)
+ {
+#pragma acc data present (i, j)
+ {
+ if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j)))
+ abort ();
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ }
+ }
+ if (i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+#pragma acc data
+ {
+#if !ACC_MEM_SHARED
+ if (is_mapped (&i, sizeof (i)) || is_mapped (&j, sizeof (j)))
+ abort ();
+#endif
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ }
+ if (i != 2 || j != 1)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
new file mode 100644
index 0000000..f867a66
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c
@@ -0,0 +1,162 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main (int argc, char **argv)
+{
+ int N = 128; //1024 * 1024;
+ float *a, *b, *c, *d, *e;
+ int i;
+ int nbytes;
+
+ nbytes = N * sizeof (float);
+
+ a = (float *) malloc (nbytes);
+ b = (float *) malloc (nbytes);
+ c = (float *) malloc (nbytes);
+ d = (float *) malloc (nbytes);
+ e = (float *) malloc (nbytes);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async
+#pragma acc parallel async wait
+#pragma acc loop
+ for (i = 0; i < N; i++)
+ b[i] = a[i];
+
+#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) wait async
+#pragma acc wait
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async (1)
+#pragma acc parallel async (1)
+#pragma acc loop
+ for (i = 0; i < N; i++)
+ b[i] = a[i];
+
+#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) wait (1) async (1)
+#pragma acc wait (1)
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 2.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ }
+
+#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (c[0:N]) copyin (d[0:N]) copyin (N) async (1)
+
+#pragma acc parallel async (1) wait (1)
+#pragma acc loop
+ for (i = 0; i < N; i++)
+ b[i] = (a[i] * a[i] * a[i]) / a[i];
+
+#pragma acc parallel async (2) wait (1)
+#pragma acc loop
+ for (i = 0; i < N; i++)
+ c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i];
+
+#pragma acc parallel async (3) wait (1)
+#pragma acc loop
+ for (i = 0; i < N; i++)
+ d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
+
+#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) copyout (d[0:N]) wait (1, 2, 3) async (1)
+#pragma acc wait (1)
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 9.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (c[0:N]) copyin (d[0:N]) copyin (e[0:N]) copyin (N) async (1)
+
+#pragma acc parallel async (1) wait (1)
+ for (int ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+
+#pragma acc parallel async (2) wait (1)
+ for (int ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+
+#pragma acc parallel async (3) wait (1)
+ for (int ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+
+#pragma acc parallel wait (1) async (4)
+ for (int ii = 0; ii < N; ii++)
+ e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+
+#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
+#pragma acc wait (1)
+
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 4.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+
+ if (e[i] != 11.0)
+ abort ();
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c
new file mode 100644
index 0000000..747109f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-3.c
@@ -0,0 +1,166 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main (int argc, char **argv)
+{
+ int N = 128; //1024 * 1024;
+ float *a, *b, *c, *d, *e;
+ int i;
+ int nbytes;
+
+ nbytes = N * sizeof (float);
+
+ a = (float *) malloc (nbytes);
+ b = (float *) malloc (nbytes);
+ c = (float *) malloc (nbytes);
+ d = (float *) malloc (nbytes);
+ e = (float *) malloc (nbytes);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async
+#pragma acc parallel async wait
+#pragma acc loop
+ for (i = 0; i < N; i++)
+ b[i] = a[i];
+
+#pragma acc update host (a[0:N], b[0:N]) async wait
+#pragma acc wait
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc update device (a[0:N], b[0:N]) async (1)
+#pragma acc parallel async (1)
+#pragma acc loop
+ for (i = 0; i < N; i++)
+ b[i] = a[i];
+
+#pragma acc update host (a[0:N], b[0:N]) async (1) wait (1)
+#pragma acc wait (1)
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 2.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ }
+
+#pragma acc update device (a[0:N]) async (1)
+#pragma acc update device (b[0:N]) async (2)
+#pragma acc enter data copyin (c[0:N], d[0:N]) async (3)
+
+#pragma acc parallel async (1) wait (1,2)
+#pragma acc loop
+ for (i = 0; i < N; i++)
+ b[i] = (a[i] * a[i] * a[i]) / a[i];
+
+#pragma acc parallel async (2) wait (1,3)
+#pragma acc loop
+ for (i = 0; i < N; i++)
+ c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i];
+
+#pragma acc parallel async (3) wait (1,3)
+#pragma acc loop
+ for (i = 0; i < N; i++)
+ d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N], d[0:N]) async (1) wait (1,2,3)
+#pragma acc wait (1)
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 9.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc update device (a[0:N], b[0:N], c[0:N], d[0:N]) async (1)
+#pragma acc enter data copyin (e[0:N]) async (5)
+
+#pragma acc parallel async (1) wait (1)
+ for (int ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+
+#pragma acc parallel async (2) wait (1)
+ for (int ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+
+#pragma acc parallel async (3) wait (1)
+ for (int ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+
+#pragma acc parallel wait (1,5) async (4)
+ for (int ii = 0; ii < N; ii++)
+ e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+
+#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
+#pragma acc exit data delete (N)
+#pragma acc wait (1)
+
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 4.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+
+ if (e[i] != 11.0)
+ abort ();
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-1.c
new file mode 100644
index 0000000..83c0a42
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-1.c
@@ -0,0 +1,19 @@
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+
+int
+main (int argc, char *argv[])
+{
+ int i;
+
+ acc_copyin (&i, sizeof i);
+
+#pragma acc data copy (i)
+ ++i;
+
+ return 0;
+}
+
+/* { dg-shouldfail "" }
+ { dg-output "Trying to map into device .* object when .* is already mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-2.c
new file mode 100644
index 0000000..137d8ce
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-2.c
@@ -0,0 +1,16 @@
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+int
+main (int argc, char *argv[])
+{
+ int i;
+
+#pragma acc data present_or_copy (i)
+#pragma acc data copyout (i)
+ ++i;
+
+ return 0;
+}
+
+/* { dg-shouldfail "" }
+ { dg-output "Trying to map into device .* object when .* is already mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-3.c
new file mode 100644
index 0000000..b993b78
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-3.c
@@ -0,0 +1,17 @@
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+
+int
+main (int argc, char *argv[])
+{
+ int i;
+
+#pragma acc data present_or_copy (i)
+ acc_copyin (&i, sizeof i);
+
+ return 0;
+}
+
+/* { dg-shouldfail "" }
+ { dg-output "already mapped to" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-4.c
new file mode 100644
index 0000000..82523f4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-4.c
@@ -0,0 +1,17 @@
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+
+int
+main (int argc, char *argv[])
+{
+ int i;
+
+ acc_present_or_copyin (&i, sizeof i);
+ acc_copyin (&i, sizeof i);
+
+ return 0;
+}
+
+/* { dg-shouldfail "" }
+ { dg-output "already mapped to" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-5.c
new file mode 100644
index 0000000..4961fe5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-5.c
@@ -0,0 +1,17 @@
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+
+int
+main (int argc, char *argv[])
+{
+ int i;
+
+#pragma acc enter data create (i)
+ acc_copyin (&i, sizeof i);
+
+ return 0;
+}
+
+/* { dg-shouldfail "" }
+ { dg-output "already mapped to" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-6.c
new file mode 100644
index 0000000..77b56a9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-6.c
@@ -0,0 +1,17 @@
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+
+int
+main (int argc, char *argv[])
+{
+ int i;
+
+ acc_present_or_copyin (&i, sizeof i);
+#pragma acc enter data create (i)
+
+ return 0;
+}
+
+/* { dg-shouldfail "" }
+ { dg-output "already mapped to" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-7.c
new file mode 100644
index 0000000..b08417b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-7.c
@@ -0,0 +1,17 @@
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+
+int
+main (int argc, char *argv[])
+{
+ int i;
+
+#pragma acc enter data create (i)
+ acc_create (&i, sizeof i);
+
+ return 0;
+}
+
+/* { dg-shouldfail "" }
+ { dg-output "already mapped to" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-8.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-8.c
new file mode 100644
index 0000000..a50f7de
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-already-8.c
@@ -0,0 +1,16 @@
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+int
+main (int argc, char *argv[])
+{
+ int i;
+
+#pragma acc data create (i)
+#pragma acc parallel copyin (i)
+ ++i;
+
+ return 0;
+}
+
+/* { dg-shouldfail "" }
+ { dg-output "Trying to map into device .* object when .* is already mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c
new file mode 100644
index 0000000..e271a37
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int main (void)
+{
+ void *a, *a_1, *a_2;
+
+#define A (void *) 0x123
+ a = A;
+
+#pragma acc data copyout (a_1, a_2)
+#pragma acc kernels deviceptr (a)
+ {
+ a_1 = a;
+ a_2 = &a;
+ }
+
+ if (a != A)
+ abort ();
+ if (a_1 != a)
+ abort ();
+#if ACC_MEM_SHARED
+ if (a_2 != &a)
+ abort ();
+#else
+ if (a_2 == &a)
+ abort ();
+#endif
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c
new file mode 100644
index 0000000..184b355
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c
@@ -0,0 +1,613 @@
+/* { dg-do run } */
+/* { dg-additional-options "-fno-builtin-acc_on_device" } */
+
+#include <openacc.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define N 32
+
+int
+main(int argc, char **argv)
+{
+ float *a, *b, *d_a, *d_b, exp, exp2;
+ int i;
+ const int one = 1;
+ const int zero = 0;
+ int n;
+
+ a = (float *) malloc (N * sizeof (float));
+ b = (float *) malloc (N * sizeof (float));
+ d_a = (float *) acc_malloc (N * sizeof (float));
+ d_b = (float *) acc_malloc (N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ a[i] = 4.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 5.0;
+#else
+ exp = 4.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 16.0;
+
+#pragma acc parallel if(0)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 17.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 8.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(one)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 9.0;
+#else
+ exp = 8.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 22.0;
+
+#pragma acc parallel if(zero)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 23.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 16.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(true)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 17.0;
+#else
+ exp = 16.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 76.0;
+
+#pragma acc parallel if(false)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 77.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 22.0;
+
+ n = 1;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 23.0;
+#else
+ exp = 22.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 18.0;
+
+ n = 0;
+
+#pragma acc parallel if(n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 19.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 49.0;
+
+ n = 1;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(n + n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 50.0;
+#else
+ exp = 49.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 38.0;
+
+ n = 0;
+
+#pragma acc parallel if(n + n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 39.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 91.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(-2)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 92.0;
+#else
+ exp = 91.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 43.0;
+
+#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(one == 1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 44.0;
+#else
+ exp = 43.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 87.0;
+
+#pragma acc parallel if(one == 0)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 88.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 9.0;
+ }
+
+#if ACC_MEM_SHARED
+ exp = 0.0;
+ exp2 = 0.0;
+#else
+ acc_map_data (a, d_a, N * sizeof (float));
+ acc_map_data (b, d_b, N * sizeof (float));
+ exp = 3.0;
+ exp2 = 9.0;
+#endif
+
+#pragma acc update device(a[0:N], b[0:N]) if(1)
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 0.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc update host(a[0:N], b[0:N]) if(1)
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != exp)
+ abort();
+
+ if (b[i] != exp2)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 12.0;
+ }
+
+#pragma acc update device(a[0:N], b[0:N]) if(0)
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 0.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc update host(a[0:N], b[0:N]) if(1)
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != exp)
+ abort();
+
+ if (b[i] != exp2)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 26.0;
+ b[i] = 21.0;
+ }
+
+#pragma acc update device(a[0:N], b[0:N]) if(1)
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 0.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc update host(a[0:N], b[0:N]) if(0)
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 0.0)
+ abort();
+
+ if (b[i] != 0.0)
+ abort();
+ }
+
+#if !ACC_MEM_SHARED
+ acc_unmap_data (a);
+ acc_unmap_data (b);
+#endif
+
+ acc_free (d_a);
+ acc_free (d_b);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(1)
+{
+#pragma acc parallel present(a[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ b[ii] = a[ii];
+ }
+ }
+}
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 4.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 8.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(0)
+{
+#if !ACC_MEM_SHARED
+ if (acc_is_present (a, N * sizeof (float)))
+ abort ();
+#endif
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+}
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 18.0;
+ b[i] = 21.0;
+ }
+
+#pragma acc data copyin(a[0:N]) if(1)
+{
+#if !ACC_MEM_SHARED
+ if (!acc_is_present (a, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc data copyout(b[0:N]) if(0)
+ {
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc data copyout(b[0:N]) if(1)
+ {
+#pragma acc parallel present(a[0:N]) present(b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ b[ii] = a[ii];
+ }
+ }
+ }
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+ }
+}
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 18.0)
+ abort ();
+ }
+
+#pragma acc enter data copyin (b[0:N]) if (0)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc exit data delete (b[0:N]) if (0)
+
+#pragma acc enter data copyin (b[0:N]) if (1)
+
+#if !ACC_MEM_SHARED
+ if (!acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc exit data delete (b[0:N]) if (1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc enter data copyin (b[0:N]) if (zero)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc exit data delete (b[0:N]) if (zero)
+
+#pragma acc enter data copyin (b[0:N]) if (one)
+
+#if !ACC_MEM_SHARED
+ if (!acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc exit data delete (b[0:N]) if (one)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc enter data copyin (b[0:N]) if (one == 0)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc exit data delete (b[0:N]) if (one == 0)
+
+#pragma acc enter data copyin (b[0:N]) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (!acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+#pragma acc exit data delete (b[0:N]) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b, N * sizeof (float)))
+ abort ();
+#endif
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c
new file mode 100644
index 0000000..3acfdf5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c
@@ -0,0 +1,184 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int i;
+
+int main (void)
+{
+ int j, v;
+
+#if 0
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) copyin (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) copyout (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) copy (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) create (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1)
+ abort ();
+#if ACC_MEM_SHARED
+ if (i != 2 || j != 1)
+ abort ();
+#else
+ if (i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copy (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_create (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1)
+ abort ();
+#if ACC_MEM_SHARED
+ if (i != 2 || j != 1)
+ abort ();
+#else
+ if (i != -1 || j != -2)
+ abort ();
+#endif
+
+#if 0
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v) present (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#endif
+
+#if 0
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc kernels /* copyout */ present_or_copyout (v)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#endif
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-empty.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-empty.c
new file mode 100644
index 0000000..a68a7cd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-empty.c
@@ -0,0 +1,6 @@
+int
+main (void)
+{
+#pragma acc kernels
+ ;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-1.c
new file mode 100644
index 0000000..17129d8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-1.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+ devtype = acc_device_nvidia;
+
+ if (acc_get_num_devices (devtype) == 0)
+ return 0;
+#endif
+
+ acc_init (devtype);
+
+ acc_init (devtype);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: device already active" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-10.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-10.c
new file mode 100644
index 0000000..cf1af8c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-10.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ void *d;
+ acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+ devtype = acc_device_nvidia;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+#endif
+
+ acc_init (devtype);
+
+ d = acc_malloc (0);
+ if (d != NULL)
+ abort ();
+
+ acc_free (0);
+
+ acc_shutdown (devtype);
+
+ acc_set_device_type (devtype);
+
+ d = acc_malloc (0);
+ if (d != NULL)
+ abort ();
+
+ acc_shutdown (devtype);
+
+ acc_init (devtype);
+
+ d = acc_malloc (1024);
+ if (d == NULL)
+ abort ();
+
+ acc_free (d);
+
+ acc_shutdown (devtype);
+
+ acc_set_device_type (devtype);
+
+ d = acc_malloc (1024);
+ if (d == NULL)
+ abort ();
+
+ acc_free (d);
+
+ acc_shutdown (devtype);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-11.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-11.c
new file mode 100644
index 0000000..eccdb8c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-11.c
@@ -0,0 +1,23 @@
+/* Only nvptx plugin does the required error checking.
+ { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+#include <stdint.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 512;
+ void *d;
+
+ d = acc_malloc (N);
+ if (d == NULL)
+ abort ();
+
+ acc_free ((void *)((uintptr_t) d + (uintptr_t) (N >> 1)));
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: mem free failed 1" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-12.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-12.c
new file mode 100644
index 0000000..b46f590
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-12.c
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ memset (h, 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c
new file mode 100644
index 0000000..7098ef3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-13.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+
+ if (acc_is_present (h, 1) != 1)
+ abort ();
+
+ if (acc_is_present (h, N + 1) != 0)
+ abort ();
+
+ if (acc_is_present (h + 1, N) != 0)
+ abort ();
+
+ if (acc_is_present (h - 1, N) != 0)
+ abort ();
+
+ if (acc_is_present (h - 1, N - 1) != 0)
+ abort ();
+
+ if (acc_is_present (h + N, 0) != 0)
+ abort ();
+
+ if (acc_is_present (h + N, N) != 0)
+ abort ();
+
+ if (acc_is_present (0, N) != 0)
+ abort ();
+
+ if (acc_is_present (h, 0) != 0)
+ abort ();
+
+ acc_free (d);
+
+ if (acc_is_present (h, 1) != 0)
+ abort ();
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c
new file mode 100644
index 0000000..a9632f7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-14.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+
+ if (acc_is_present (h, 1) != 1)
+ abort ();
+
+ if (acc_is_present (h + N - 1, 1) != 1)
+ abort ();
+
+ if (acc_is_present (h - 1, 1) != 0)
+ abort ();
+
+ if (acc_is_present (h + N, 1) != 0)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 1)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, N - i) != 1)
+ abort ();
+ }
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, N - i) != 0)
+ abort ();
+ }
+
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c
new file mode 100644
index 0000000..4f6a731
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-15.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-16.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-16.c
new file mode 100644
index 0000000..9d277ac
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-16.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ (void) acc_copyin (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\] already mapped to \[\h+,\+256\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-17.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-17.c
new file mode 100644
index 0000000..5ff894c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-17.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h, N);
+
+ acc_copyout (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-18.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-18.c
new file mode 100644
index 0000000..2bc32637
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-18.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+
+ acc_free (d);
+
+ acc_copyout (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-19.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-19.c
new file mode 100644
index 0000000..3581616
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-19.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+#include <stdio.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h[N];
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ unsigned char *p;
+
+ h[i] = (unsigned char *) malloc (N);
+ p = h[i];
+
+ for (j = 0; j < N; j++)
+ {
+ p[j] = i;
+ }
+
+ (void) acc_copyin (p, N);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ memset (h[i], 0, i);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ unsigned char *p;
+
+ acc_copyout (h[i], N);
+
+ p = h[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (p[j] != i)
+ abort ();
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ free (h[i]);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-2.c
new file mode 100644
index 0000000..9a4501f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-2.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+ devtype = acc_device_nvidia;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+#endif
+
+ acc_init (devtype);
+
+ acc_shutdown (devtype);
+
+ acc_shutdown (devtype);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: no device initialized" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c
new file mode 100644
index 0000000..b379a8f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-20.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h, N + 1);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surounds2 \[\h+,\+257\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-21.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-21.c
new file mode 100644
index 0000000..3a67400
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-21.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h, 0);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-22.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-22.c
new file mode 100644
index 0000000..2b86da8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-22.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ (void) acc_copyin (h, N);
+
+ acc_copyout (h + 1, N - 1);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+255\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c
new file mode 100644
index 0000000..38f236d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-23.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h1, *h2;
+
+ h1 = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h1[i] = 0xab;
+ }
+
+ (void) acc_copyin (h1, N);
+
+ h2 = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h2[i] = 0xde;
+ }
+
+ (void) acc_copyin (h2, N);
+
+ acc_copyout (h1, N + N);
+
+ free (h1);
+ free (h2);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+512\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c
new file mode 100644
index 0000000..d7de8e3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-24.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 1)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 1)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-25.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-25.c
new file mode 100644
index 0000000..1145828
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-25.c
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] already mapped to \[\h+,256\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-26.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-26.c
new file mode 100644
index 0000000..a23f56e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-26.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, 0);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-27.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-27.c
new file mode 100644
index 0000000..074fddb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-27.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (0, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\)\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-28.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-28.c
new file mode 100644
index 0000000..027f7cc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-28.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ acc_delete (0, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-29.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-29.c
new file mode 100644
index 0000000..a66de0f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-29.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, 0);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-3.c
new file mode 100644
index 0000000..e823a41
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-3.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ acc_init (acc_device_host);
+
+ acc_shutdown (acc_device_not_host);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: device 4(4) is initialized" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-30.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-30.c
new file mode 100644
index 0000000..ce2bdb4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-30.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_create (h, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N - 2);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+254\]" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-31.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-31.c
new file mode 100644
index 0000000..25ce5a9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-31.c
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_present_or_create (h, N);
+ if (!d)
+ abort ();
+
+ if (acc_is_present (h, 1) != 1)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-32.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-32.c
new file mode 100644
index 0000000..e3f87a8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-32.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d1, *d2;
+
+ h = (unsigned char *) malloc (N);
+
+ d1 = acc_present_or_create (h, N);
+ if (!d1)
+ abort ();
+
+ d2 = acc_present_or_create (h, N);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ d2 = acc_pcreate (h, N);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-33.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-33.c
new file mode 100644
index 0000000..4abaa02
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-33.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d1, *d2;
+
+ h = (unsigned char *) malloc (N);
+
+ d1 = acc_present_or_create (h, N);
+ if (!d1)
+ abort ();
+
+ d2 = acc_present_or_create (h, N - 2);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c
new file mode 100644
index 0000000..32d5d51
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-34.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d1, *d2;
+
+ h = (unsigned char *) malloc (N);
+
+ d1 = acc_present_or_create (h, N);
+ if (!d1)
+ abort ();
+
+ d2 = acc_present_or_create (h + 2, N);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\] not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-35.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-35.c
new file mode 100644
index 0000000..ca8edab
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-35.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_present_or_create (0, N);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-36.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-36.c
new file mode 100644
index 0000000..cb29397
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-36.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_present_or_create (h, 0);
+ if (!d)
+ abort ();
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-37.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-37.c
new file mode 100644
index 0000000..5a7d533
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-37.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_present_or_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-38.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-38.c
new file mode 100644
index 0000000..05d8498
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-38.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d1, *d2;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d1 = acc_present_or_copyin (h, N);
+ if (!d1)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ d2 = acc_present_or_copyin (h, N);
+ if (!d2)
+ abort ();
+
+ if (d1 != d2)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ d2 = acc_pcopyin (h, N);
+ if (!d2)
+ abort ();
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-39.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-39.c
new file mode 100644
index 0000000..db1e0b3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-39.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_present_or_copyin (0, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-4.c
new file mode 100644
index 0000000..060275b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-4.c
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ acc_init ((acc_device_t) 99);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: device 99 is out of range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-40.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-40.c
new file mode 100644
index 0000000..cb6c422
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-40.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_present_or_copyin (h, 0);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-41.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-41.c
new file mode 100644
index 0000000..01c5f3c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-41.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ acc_update_device (h, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c
new file mode 100644
index 0000000..d577fe3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-42.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ acc_update_device (h, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-43.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-43.c
new file mode 100644
index 0000000..ceeb155
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-43.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ acc_update_device (0, N);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c
new file mode 100644
index 0000000..0cabb0d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-44.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ acc_update_device (h, 0);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-45.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-45.c
new file mode 100644
index 0000000..f9a6294
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-45.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = 0xab;
+ }
+
+ acc_update_device (h, N - 2);
+
+ acc_copyout (h, N);
+
+ for (i = 0; i < N - 2; i++)
+ {
+ if (h[i] != 0xab)
+ abort ();
+ }
+
+ for (i = N - 2; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-46.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-46.c
new file mode 100644
index 0000000..b195725
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-46.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_update_self (h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-47.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-47.c
new file mode 100644
index 0000000..a7ff904
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-47.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_update_self (0, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c
new file mode 100644
index 0000000..01d3c6c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-48.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_update_self (h, 0);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-49.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-49.c
new file mode 100644
index 0000000..a33324c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-49.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_copyin (h, N);
+ if (!d)
+ abort ();
+
+ memset (&h[0], 0, N);
+
+ acc_update_self (h, N - 2);
+
+ for (i = 0; i < N - 2; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ for (i = N - 2; i < N; i++)
+ {
+ if (h[i] != 0)
+ abort ();
+ }
+
+ acc_delete (h, N);
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-5.c
new file mode 100644
index 0000000..961a62c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-5.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ if (acc_get_device_type () == acc_device_default)
+ abort ();
+
+ acc_init (acc_device_default);
+
+ if (acc_get_device_type () == acc_device_default)
+ abort ();
+
+ acc_shutdown (acc_device_default);
+
+ if (acc_get_num_devices (acc_device_nvidia) != 0)
+ {
+ acc_init (acc_device_nvidia);
+
+ if (acc_get_device_type () != acc_device_nvidia)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ acc_init (acc_device_default);
+
+ acc_set_device_type (acc_device_nvidia);
+
+ if (acc_get_device_type () != acc_device_nvidia)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+ }
+
+ return 0;
+
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-50.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-50.c
new file mode 100644
index 0000000..e8294e1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-50.c
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N);
+
+ if (acc_is_present (h, N) != 1)
+ abort ();
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-51.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-51.c
new file mode 100644
index 0000000..29d28f2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-51.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h[N];
+ void *d[N];
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = (unsigned char *) malloc (N);
+ d[i] = acc_malloc (N);
+
+ acc_map_data (h[i], d[i], N);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h[i], N) != 1)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ acc_unmap_data (h[i]);
+
+ if (acc_is_present (h[i], N) != 0)
+ abort ();
+
+ acc_free (d[i]);
+ free (h[i]);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c
new file mode 100644
index 0000000..780db31
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-52.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (0, d, N);
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[(nil),+256\]->\[\h+,\+256\] is a bad map" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c
new file mode 100644
index 0000000..657adde
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-53.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, 0, N);
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+256\]->\[(nil),\+256\] is a bad map" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c
new file mode 100644
index 0000000..1f3df80
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-54.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, 0);
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \[\h+,\+0\]->\[\h+,\+0\] is a bad map" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-55.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-55.c
new file mode 100644
index 0000000..286653f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-55.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+#include <stdint.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ int i;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ acc_map_data ((void *)((uintptr_t) h + (uintptr_t) i),
+ (void *)((uintptr_t) d + (uintptr_t) i), 1);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + 1, 1) != 1)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ acc_unmap_data (h + i);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + 1, 1) != 0)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-56.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-56.c
new file mode 100644
index 0000000..e3f5a80
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-56.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N >> 1);
+
+ if (acc_is_present (h, 1) != 1)
+ abort ();
+
+ if (acc_is_present (h + (N >> 1), 1) != 0)
+ abort ();
+
+ acc_unmap_data (h);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-57.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-57.c
new file mode 100644
index 0000000..f9043a4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-57.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N);
+
+ acc_unmap_data (d);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \h+ is not a mapped block" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-58.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-58.c
new file mode 100644
index 0000000..9d6e27d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-58.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N);
+
+ acc_unmap_data (0);
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: \(nil\) is not a mapped block" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-59.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-59.c
new file mode 100644
index 0000000..2f087ae
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-59.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+#include <stdint.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_hostptr ((void *)((uintptr_t) d + (uintptr_t) i)) !=
+ (void *)((uintptr_t) h + (uintptr_t) i))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_deviceptr ((void *)((uintptr_t) h + (uintptr_t) i)) !=
+ (void *)((uintptr_t) d + (uintptr_t) i))
+ abort ();
+ }
+
+ acc_unmap_data (h);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_hostptr ((void *)((uintptr_t) d + (uintptr_t) i)) != 0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_deviceptr (h + i) != 0)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-6.c
new file mode 100644
index 0000000..afdd480
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-6.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ int devnum;
+
+ if (acc_get_device_type () == acc_device_default)
+ abort ();
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ acc_set_device_type (acc_device_nvidia);
+
+ if (acc_get_device_type () != acc_device_nvidia)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ acc_set_device_type (acc_device_nvidia);
+
+ if (acc_get_device_type () != acc_device_nvidia)
+ abort ();
+
+ devnum = acc_get_num_devices (acc_device_host);
+ if (devnum != 1)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ if (acc_get_device_type () == acc_device_default)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-60.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-60.c
new file mode 100644
index 0000000..ccae728e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-60.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_is_present (h + i, 1) != 0)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-61.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-61.c
new file mode 100644
index 0000000..ce66ced
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-61.c
@@ -0,0 +1,70 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h[N];
+ void *d[N];
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ unsigned char *p;
+
+ h[i] = (unsigned char *) malloc (N);
+
+ p = h[i];
+
+ for (j = 0; j < N; j++)
+ {
+ p[j] = i;
+ }
+
+ d[i] = acc_malloc (N);
+
+ acc_memcpy_to_device (d[i], h[i], N);
+
+ for (j = 0; j < N; j++)
+ {
+ if (acc_is_present (h[i] + j, 1) != 0)
+ abort ();
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ unsigned char *p;
+
+ memset (h[i], 0, N);
+
+ acc_memcpy_from_device (h[i], d[i], N);
+
+ p = h[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (p[j] != i)
+ abort ();
+ }
+
+ for (j = 0; j < N; j++)
+ {
+ if (acc_is_present (h[i] + j, 1) != 0)
+ abort ();
+ }
+
+ acc_free (d[i]);
+
+ free (h[i]);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-62.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-62.c
new file mode 100644
index 0000000..e6178e2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-62.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ acc_init (acc_device_nvidia);
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_to_device (d, h, N << 1);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid size" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-63.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-63.c
new file mode 100644
index 0000000..ca237ec
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-63.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (0, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid device address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-64.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-64.c
new file mode 100644
index 0000000..850fd2e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-64.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, 0, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-65.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-65.c
new file mode 100644
index 0000000..26c8cef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-65.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, d, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host or device address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-66.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-66.c
new file mode 100644
index 0000000..398dc2a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-66.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ acc_init (acc_device_default);
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_to_device (d, h, 0);
+
+ acc_memcpy_from_device (h, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ acc_shutdown (acc_device_default);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-67.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-67.c
new file mode 100644
index 0000000..01b8b2d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-67.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (0, d, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid host address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-68.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-68.c
new file mode 100644
index 0000000..3ff5bd7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-68.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 256;
+ int i;
+ unsigned char *h;
+ void *d;
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_memcpy_to_device (d, h, N);
+
+ memset (&h[0], 0, N);
+
+ acc_memcpy_from_device (h, 0, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (h[i] != i)
+ abort ();
+ }
+
+ acc_free (d);
+
+ free (h);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: invalid device address" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c
new file mode 100644
index 0000000..5462f12
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c
@@ -0,0 +1,124 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ stream = (CUstream) acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ if (acc_async_test (0) != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ sleep (1);
+
+ if (acc_async_test (0) != 1)
+ {
+ fprintf (stderr, "found asynchronous operation still running\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-7.c
new file mode 100644
index 0000000..e78734b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-7.c
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ if (acc_get_num_devices (acc_device_none) != 0)
+ abort ();
+
+ if (acc_get_num_devices (acc_device_host) == 0)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c
new file mode 100644
index 0000000..912b266
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c
@@ -0,0 +1,136 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ const int N = 10;
+ int i;
+ CUstream streams[N];
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = (CUstream) acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ if (acc_async_test (i) != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+ }
+
+ sleep ((int) (dtime / 1000.0f) + 1);
+
+ for (i = 0; i < N; i++)
+ {
+ if (acc_async_test (i) != 1)
+ {
+ fprintf (stderr, "found asynchronous operation still running\n");
+ abort ();
+ }
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c
new file mode 100644
index 0000000..a045379
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-71.c
@@ -0,0 +1,119 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ acc_set_cuda_stream (0, stream);
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ if (acc_async_test (1) != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ sleep ((int) (dtime / 1000.0f) + 1);
+
+ if (acc_async_test (1) != 1)
+ {
+ fprintf (stderr, "found asynchronous operation still running\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: unknown async \d" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c
new file mode 100644
index 0000000..e383ba0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c
@@ -0,0 +1,121 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ if (acc_async_test_all () != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ sleep ((int) (dtime / 1000.f) + 1);
+
+ if (acc_async_test_all () != 1)
+ {
+ fprintf (stderr, "found asynchronous operation still running\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c
new file mode 100644
index 0000000..43a8b7e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c
@@ -0,0 +1,134 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ const int N = 10;
+ int i;
+ CUstream streams[N];
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = (CUstream) acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ }
+
+ if (acc_async_test_all () != 0)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ sleep ((int) (dtime / 1000.0f) + 1);
+
+ if (acc_async_test_all () != 1)
+ {
+ fprintf (stderr, "asynchronous operation not running\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c
new file mode 100644
index 0000000..0726ee4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c
@@ -0,0 +1,139 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ stream = (CUstream) acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ init_timers (1);
+
+ start_timer (0);
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait (0);
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ acc_wait (0);
+
+ atime = stop_timer (0);
+
+ if (0.010 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c
new file mode 100644
index 0000000..1942211
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c
@@ -0,0 +1,141 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime, hitime, lotime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ stream = (CUstream) acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait (0);
+ }
+
+ atime = stop_timer (0);
+
+ hitime = dtime * N;
+ hitime += hitime * 0.02;
+
+ lotime = dtime * N;
+ lotime -= lotime * 0.02;
+
+ if (atime > hitime || atime < lotime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c
new file mode 100644
index 0000000..11d9d62
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c
@@ -0,0 +1,147 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream *streams;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime, hitime, lotime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ streams = (CUstream *) malloc (N * sizeof (void *));
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = (CUstream) acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait (i);
+ }
+
+ atime = stop_timer (0);
+
+ hitime = dtime * N;
+ hitime += hitime * 0.02;
+
+ lotime = dtime * N;
+ lotime -= lotime * 0.02;
+
+ if (atime > hitime || atime < lotime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (streams);
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c
new file mode 100644
index 0000000..e47212b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-77.c
@@ -0,0 +1,135 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ acc_set_cuda_stream (0, stream);
+
+ init_timers (1);
+
+ start_timer (0);
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait (1);
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ acc_wait (1);
+
+ atime = stop_timer (0);
+
+ if (0.010 < atime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: unknown async \d" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c
new file mode 100644
index 0000000..4f58fb2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c
@@ -0,0 +1,140 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ stream = (CUstream) acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ init_timers (1);
+
+ start_timer (0);
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ acc_wait_all ();
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ acc_wait_all ();
+
+ atime = stop_timer (0);
+
+ if (0.010 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c
new file mode 100644
index 0000000..ef3df13
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c
@@ -0,0 +1,167 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime, hitime, lotime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ devnum = 2;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (1, stream))
+ abort ();
+
+ stream = (CUstream) acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+ }
+
+ acc_wait_async (0, 1);
+
+ if (acc_async_test (0) != 0)
+ abort ();
+
+ if (acc_async_test (1) != 0)
+ abort ();
+
+ acc_wait (1);
+
+ atime = stop_timer (0);
+
+ if (acc_async_test (0) != 1)
+ abort ();
+
+ if (acc_async_test (1) != 1)
+ abort ();
+
+ hitime = dtime * N;
+ hitime += hitime * 0.02;
+
+ lotime = dtime * N;
+ lotime -= lotime * 0.02;
+
+ if (atime > hitime || atime < lotime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c
new file mode 100644
index 0000000..0b5ec24
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c
@@ -0,0 +1,132 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ CUstream stream;
+ int N;
+ int i;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 200.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ acc_set_cuda_stream (1, stream);
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+ }
+
+ acc_wait_async (1, 1);
+
+ acc_wait (1);
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-shouldfail "libgomp: identical parameters" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c
new file mode 100644
index 0000000..d5f18f0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c
@@ -0,0 +1,211 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream *streams, stream;
+ unsigned long *a, *d_a, dticks;
+ int nbytes;
+ float atime, dtime;
+ void *kargs[2];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay, module, "delay");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = nprocs * sizeof (unsigned long);
+
+ dtime = 500.0;
+
+ dticks = (unsigned long) (dtime * clkrate);
+
+ N = nprocs;
+
+ a = (unsigned long *) malloc (nbytes);
+ d_a = (unsigned long *) acc_malloc (nbytes);
+
+ acc_map_data (a, d_a, nbytes);
+
+ streams = (CUstream *) malloc (N * sizeof (void *));
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = (CUstream) acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ init_timers (1);
+
+ kargs[0] = (void *) &d_a;
+ kargs[1] = (void *) &dticks;
+
+ stream = (CUstream) acc_get_cuda_stream (N);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (N, stream))
+ abort ();
+
+ start_timer (0);
+
+ for (i = 0; i < N; i++)
+ {
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+ }
+
+ acc_wait_all_async (N);
+
+ for (i = 0; i <= N; i++)
+ {
+ if (acc_async_test (i) != 0)
+ abort ();
+ }
+
+ acc_wait (N);
+
+ for (i = 0; i <= N; i++)
+ {
+ if (acc_async_test (i) != 1)
+ abort ();
+ }
+
+ atime = stop_timer (0);
+
+ if (atime < dtime)
+ {
+ fprintf (stderr, "actual time < delay time\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ stream = (CUstream) acc_get_cuda_stream (N + 1);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (N + 1, stream))
+ abort ();
+
+ acc_wait_all_async (N + 1);
+
+ acc_wait (N + 1);
+
+ atime = stop_timer (0);
+
+ if (0.10 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ start_timer (0);
+
+ acc_wait_all_async (N);
+
+ acc_wait (N);
+
+ atime = stop_timer (0);
+
+ if (0.10 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ acc_unmap_data (a);
+
+ fini_timers ();
+
+ free (streams);
+ free (a);
+ acc_free (d_a);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c
new file mode 100644
index 0000000..be30a7f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c
@@ -0,0 +1,144 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ CUdevice dev;
+ CUfunction delay2;
+ CUmodule module;
+ CUresult r;
+ int N;
+ int i;
+ CUstream *streams;
+ unsigned long **a, **d_a, *tid, ticks;
+ int nbytes;
+ void *kargs[3];
+ int clkrate;
+ int devnum, nprocs;
+
+ acc_init (acc_device_nvidia);
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+
+ r = cuDeviceGet (&dev, devnum);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ abort ();
+ }
+
+ r =
+ cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
+ dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleLoad (&module, "subr.ptx");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuModuleGetFunction (&delay2, module, "delay2");
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ abort ();
+ }
+
+ nbytes = sizeof (int);
+
+ ticks = (unsigned long) (200.0 * clkrate);
+
+ N = nprocs;
+
+ streams = (CUstream *) malloc (N * sizeof (void *));
+
+ a = (unsigned long **) malloc (N * sizeof (unsigned long *));
+ d_a = (unsigned long **) malloc (N * sizeof (unsigned long *));
+ tid = (unsigned long *) malloc (N * sizeof (unsigned long));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = (unsigned long *) malloc (sizeof (unsigned long));
+ *a[i] = N;
+ d_a[i] = (unsigned long *) acc_malloc (nbytes);
+ tid[i] = i;
+
+ acc_map_data (a[i], d_a[i], nbytes);
+
+ streams[i] = (CUstream) acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ kargs[0] = (void *) &d_a[i];
+ kargs[1] = (void *) &ticks;
+ kargs[2] = (void *) &tid[i];
+
+ r = cuLaunchKernel (delay2, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ ticks = (unsigned long) (50.0 * clkrate);
+ }
+
+ acc_wait_all_async (0);
+
+ for (i = 0; i < N; i++)
+ {
+ acc_copyout (a[i], nbytes);
+ if (*a[i] != i)
+ abort ();
+ }
+
+ free (streams);
+
+ for (i = 0; i < N; i++)
+ {
+ free (a[i]);
+ }
+
+ free (a);
+ free (d_a);
+ free (tid);
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c
new file mode 100644
index 0000000..1c2e52b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-83.c
@@ -0,0 +1,58 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include "timer.h"
+
+int
+main (int argc, char **argv)
+{
+ float atime;
+ CUstream stream;
+ CUresult r;
+
+ acc_init (acc_device_nvidia);
+
+ (void) acc_get_device_num (acc_device_nvidia);
+
+ init_timers (1);
+
+ stream = (CUstream) acc_get_cuda_stream (0);
+ if (stream != NULL)
+ abort ();
+
+ r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, stream))
+ abort ();
+
+ start_timer (0);
+
+ acc_wait_all_async (0);
+
+ acc_wait (0);
+
+ atime = stop_timer (0);
+
+ if (0.010 < atime)
+ {
+ fprintf (stderr, "actual time too long\n");
+ abort ();
+ }
+
+ fini_timers ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c
new file mode 100644
index 0000000..786b908
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-84.c
@@ -0,0 +1,66 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <openacc.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 100;
+ int i;
+ CUstream *streams;
+ CUstream s;
+ CUresult r;
+
+ acc_init (acc_device_nvidia);
+
+ (void) acc_get_device_num (acc_device_nvidia);
+
+ streams = (CUstream *) malloc (N * sizeof (void *));
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = (CUstream) acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+ int cnt;
+
+ cnt = 0;
+
+ s = streams[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (s == streams[j])
+ cnt++;
+ }
+
+ if (cnt != 1)
+ abort ();
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c
new file mode 100644
index 0000000..cf925a7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-85.c
@@ -0,0 +1,52 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <stdio.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 100;
+ int i;
+ CUstream *streams;
+ CUstream s;
+ CUresult r;
+
+ acc_init (acc_device_nvidia);
+
+ (void) acc_get_device_num (acc_device_nvidia);
+
+ streams = (CUstream *) malloc (N * sizeof (void *));
+
+ for (i = 0; i < N; i++)
+ {
+ streams[i] = (CUstream) acc_get_cuda_stream (i);
+ if (streams[i] != NULL)
+ abort ();
+
+ r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (i, streams[i]))
+ abort ();
+ }
+
+ s = NULL;
+
+ if (acc_set_cuda_stream (N + 1, s) != 0)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ exit (0);
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-86.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-86.c
new file mode 100644
index 0000000..b8a8ee9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-86.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ if (acc_get_current_cuda_device () != 0)
+ abort ();
+
+ acc_init (acc_device_host);
+
+ if (acc_get_current_cuda_device () != 0)
+ abort ();
+
+ acc_shutdown (acc_device_host);
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ if (acc_get_current_cuda_device () != 0)
+ abort ();
+
+ acc_init (acc_device_nvidia);
+
+ if (acc_get_current_cuda_device () == 0)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ if (acc_get_current_cuda_device () != 0)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-87.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-87.c
new file mode 100644
index 0000000..147d443
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-87.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ if (acc_get_current_cuda_context () != 0)
+ abort ();
+
+ acc_init (acc_device_host);
+
+ if (acc_get_current_cuda_context () != 0)
+ abort ();
+
+ acc_shutdown (acc_device_host);
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ if (acc_get_current_cuda_context () != 0)
+ abort ();
+
+ acc_init (acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () == 0)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () != 0)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-88.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-88.c
new file mode 100644
index 0000000..10f4ad8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-88.c
@@ -0,0 +1,111 @@
+/* { dg-do run } */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char *x;
+void *d_x;
+const int N = 256;
+
+static void *
+test (void *arg)
+{
+ int i;
+
+ if (acc_get_current_cuda_context () != NULL)
+ abort ();
+
+ if (acc_is_present (x, N) != 1)
+ abort ();
+
+ memset (x, 0, N);
+
+ acc_copyout (x, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (x[i] != i)
+ abort ();
+
+ x[i] = N - i - 1;
+ }
+
+ d_x = acc_copyin (x, N);
+
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ const int nthreads = 1;
+ int i;
+ pthread_attr_t attr;
+ pthread_t *tid;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ acc_init (acc_device_nvidia);
+
+ x = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ x[i] = i;
+ }
+
+ d_x = acc_copyin (x, N);
+
+ if (acc_is_present (x, N) != 1)
+ abort ();
+
+ if (pthread_attr_init (&attr) != 0)
+ perror ("pthread_attr_init failed");
+
+ tid = (pthread_t *) malloc (nthreads * sizeof (pthread_t));
+
+ for (i = 0; i < nthreads; i++)
+ {
+ if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+ != 0)
+ perror ("pthread_create failed");
+ }
+
+ if (pthread_attr_destroy (&attr) != 0)
+ perror ("pthread_attr_destroy failed");
+
+ for (i = 0; i < nthreads; i++)
+ {
+ void *res;
+
+ if (pthread_join (tid[i], &res) != 0)
+ perror ("pthread join failed");
+ }
+
+ if (acc_is_present (x, N) != 1)
+ abort ();
+
+ memset (x, 0, N);
+
+ acc_copyout (x, N);
+
+ for (i = 0; i < N; i++)
+ {
+ if (x[i] != N - i - 1)
+ abort ();
+ }
+
+ if (acc_is_present (x, N) != 0)
+ abort ();
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-89.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-89.c
new file mode 100644
index 0000000..061c409
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-89.c
@@ -0,0 +1,118 @@
+/* { dg-do run } */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 16;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+ int i;
+ int tid;
+ unsigned char *p;
+ int devnum;
+
+ tid = (int) (long) arg;
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+ acc_set_device_num (devnum, acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () == NULL)
+ abort ();
+
+ p = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ p[i] = tid;
+ }
+
+ x[tid] = p;
+
+ d_x[tid] = acc_copyin (p, N);
+
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ pthread_attr_t attr;
+ pthread_t *tid;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ acc_init (acc_device_nvidia);
+
+ x = (unsigned char **) malloc (NTHREADS * N);
+ d_x = (void **) malloc (NTHREADS * N);
+
+ if (pthread_attr_init (&attr) != 0)
+ perror ("pthread_attr_init failed");
+
+ tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+ != 0)
+ perror ("pthread_create failed");
+ }
+
+ if (pthread_attr_destroy (&attr) != 0)
+ perror ("pthread_attr_destroy failed");
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ void *res;
+
+ if (pthread_join (tid[i], &res) != 0)
+ perror ("pthread join failed");
+ }
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (acc_is_present (x[i], N) != 1)
+ abort ();
+ }
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ memset (x[i], 0, N);
+ acc_copyout (x[i], N);
+ }
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ unsigned char *p;
+ int j;
+
+ p = x[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (p[j] != i)
+ abort ();
+ }
+
+ if (acc_is_present (x[i], N) != 0)
+ abort ();
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-9.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-9.c
new file mode 100644
index 0000000..84045db
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-9.c
@@ -0,0 +1,70 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ int num_devices;
+ int devnum;
+ acc_device_t devtype = acc_device_host;
+
+#if ACC_DEVICE_TYPE_nvidia
+ devtype = acc_device_nvidia;
+#endif
+
+ num_devices = acc_get_num_devices (devtype);
+ if (num_devices == 0)
+ return 0;
+
+ acc_init (devtype);
+
+ for (i = 0; i < num_devices; i++)
+ {
+ acc_set_device_num (i, devtype);
+ devnum = acc_get_device_num (devtype);
+ if (devnum != i)
+ abort ();
+ }
+
+ acc_shutdown (devtype);
+
+ num_devices = acc_get_num_devices (devtype);
+ if (num_devices == 0)
+ abort ();
+
+ for (i = 0; i < num_devices; i++)
+ {
+ acc_set_device_num (i, devtype);
+ devnum = acc_get_device_num (devtype);
+ if (devnum != i)
+ abort ();
+ }
+
+ acc_shutdown (devtype);
+
+ acc_init (devtype);
+
+ acc_set_device_num (0, devtype);
+
+ devnum = acc_get_device_num (devtype);
+ if (devnum != 0)
+ abort ();
+
+ if (num_devices > 1)
+ {
+ acc_set_device_num (1, (acc_device_t) 0);
+
+ devnum = acc_get_device_num (devtype);
+ if (devnum != 0)
+ abort ();
+ }
+
+ acc_shutdown (devtype);
+
+ return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-90.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-90.c
new file mode 100644
index 0000000..d17755b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-90.c
@@ -0,0 +1,137 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+#include <cuda.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 16;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+ int i;
+ int tid;
+ unsigned char *p;
+ int devnum;
+
+ tid = (int) (long) arg;
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+ acc_set_device_num (devnum, acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () == NULL)
+ abort ();
+
+ p = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ p[i] = tid;
+ }
+
+ x[tid] = p;
+
+ d_x[tid] = acc_copyin (p, N);
+
+ acc_wait_all ();
+
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ pthread_attr_t attr;
+ pthread_t *tid;
+ CUresult r;
+ CUstream s;
+
+ acc_init (acc_device_nvidia);
+
+ x = (unsigned char **) malloc (NTHREADS * N);
+ d_x = (void **) malloc (NTHREADS * N);
+
+ if (pthread_attr_init (&attr) != 0)
+ perror ("pthread_attr_init failed");
+
+ tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+ r = cuStreamCreate (&s, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, s))
+ abort ();
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+ != 0)
+ perror ("pthread_create failed");
+ }
+
+ if (pthread_attr_destroy (&attr) != 0)
+ perror ("pthread_attr_destroy failed");
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ void *res;
+
+ if (pthread_join (tid[i], &res) != 0)
+ perror ("pthread join failed");
+ }
+
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (acc_is_present (x[i], N) != 1)
+ abort ();
+ }
+
+ acc_get_cuda_stream (1);
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ memset (x[i], 0, N);
+ acc_copyout (x[i], N);
+ }
+
+ acc_wait_all ();
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ unsigned char *p;
+ int j;
+
+ p = x[i];
+
+ for (j = 0; j < N; j++)
+ {
+ if (p[j] != i)
+ abort ();
+ }
+
+ if (acc_is_present (x[i], N) != 0)
+ abort ();
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-91.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-91.c
new file mode 100644
index 0000000..e00ef4f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-91.c
@@ -0,0 +1,84 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-lcuda" } */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <openacc.h>
+#include <sys/time.h>
+#include <stdio.h>
+#include <cuda.h>
+
+int
+main (int argc, char **argv)
+{
+ const int N = 1024 * 1024;
+ int i;
+ unsigned char *h;
+ void *d;
+ float async, sync;
+ struct timeval start, stop;
+ CUresult r;
+ CUstream s;
+
+ acc_init (acc_device_nvidia);
+
+ h = (unsigned char *) malloc (N);
+
+ for (i = 0; i < N; i++)
+ {
+ h[i] = i;
+ }
+
+ d = acc_malloc (N);
+
+ acc_map_data (h, d, N);
+
+ gettimeofday (&start, NULL);
+
+ for (i = 0; i < 100; i++)
+ {
+#pragma acc update device(h[0:N])
+ }
+
+ gettimeofday (&stop, NULL);
+
+ sync = (float) (stop.tv_sec - start.tv_sec);
+ sync += (float) ((stop.tv_usec - start.tv_usec) / 1000000.0);
+
+ gettimeofday (&start, NULL);
+
+ r = cuStreamCreate (&s, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ if (!acc_set_cuda_stream (0, s))
+ abort ();
+
+ for (i = 0; i < 100; i++)
+ {
+#pragma acc update device(h[0:N]) async(0)
+ }
+
+ acc_wait_all ();
+
+ gettimeofday (&stop, NULL);
+
+ async = (float) (stop.tv_sec - start.tv_sec);
+ async += (float) ((stop.tv_usec - start.tv_usec) / 1000000.0);
+
+ if (async > (sync * 1.5))
+ abort ();
+
+ acc_free (d);
+
+ free (h);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-92.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-92.c
new file mode 100644
index 0000000..18193e0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-92.c
@@ -0,0 +1,112 @@
+/* { dg-do run } */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <ctype.h>
+#include <openacc.h>
+
+unsigned char **x;
+void **d_x;
+const int N = 32;
+const int NTHREADS = 32;
+
+static void *
+test (void *arg)
+{
+ int i;
+ int tid;
+ unsigned char *p;
+ int devnum;
+
+ tid = (int) (long) arg;
+
+ devnum = acc_get_device_num (acc_device_nvidia);
+ acc_set_device_num (devnum, acc_device_nvidia);
+
+ if (acc_get_current_cuda_context () == NULL)
+ abort ();
+
+ acc_copyout (x[tid], N);
+
+ p = x[tid];
+
+ for (i = 0; i < N; i++)
+ {
+ if (p[i] != i)
+ abort ();
+ }
+
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ pthread_attr_t attr;
+ pthread_t *tid;
+ unsigned char *p;
+
+ if (acc_get_num_devices (acc_device_nvidia) == 0)
+ return 0;
+
+ acc_init (acc_device_nvidia);
+
+ x = (unsigned char **) malloc (NTHREADS * N);
+ d_x = (void **) malloc (NTHREADS * N);
+
+ for (i = 0; i < N; i++)
+ {
+ int j;
+
+ p = (unsigned char *) malloc (N);
+
+ x[i] = p;
+
+ for (j = 0; j < N; j++)
+ {
+ p[j] = j;
+ }
+
+ d_x[i] = acc_copyin (p, N);
+ }
+
+ if (pthread_attr_init (&attr) != 0)
+ perror ("pthread_attr_init failed");
+
+ tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t));
+
+ acc_get_cuda_stream (1);
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
+ != 0)
+ perror ("pthread_create failed");
+ }
+
+ if (pthread_attr_destroy (&attr) != 0)
+ perror ("pthread_attr_destroy failed");
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ void *res;
+
+ if (pthread_join (tid[i], &res) != 0)
+ perror ("pthread join failed");
+ }
+
+ for (i = 0; i < NTHREADS; i++)
+ {
+ if (acc_is_present (x[i], N) != 0)
+ abort ();
+ }
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/nested-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/nested-1.c
new file mode 100644
index 0000000..ededf2b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/nested-1.c
@@ -0,0 +1,680 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main (int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b, *c, *d;
+ int i;
+
+ a = (float *) malloc (N * sizeof (float));
+ b = (float *) malloc (N * sizeof (float));
+ c = (float *) malloc (N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copyin (a[0:N]) copyout (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc data copyin (a[0:N]) copyout (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 5.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+ d = (float *) acc_copyin (&a[0], N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc data present_or_copyin (a[0:N]) copyout (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copyin (a[0:N]) present_or_copyout (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 2.0;
+ }
+
+ d = (float *) acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc data copyin (a[0:N]) present_or_copyout (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 2.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ acc_free (d);
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 4.0;
+ }
+
+#pragma acc data copy (a[0:N]) copyout (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ a[ii] = a[ii] + 1;
+ b[ii] = a[ii] + 2;
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort ();
+
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 7.0;
+ }
+
+#pragma acc data present_or_copy (a[0:N]) present_or_copy (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ a[ii] = a[ii] + 1;
+ b[ii] = b[ii] + 2;
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 9.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 7.0;
+ }
+
+ d = (float *) acc_copyin (&a[0], N * sizeof (float));
+ d = (float *) acc_copyin (&b[0], N * sizeof (float));
+
+#pragma acc data present_or_copy (a[0:N]) present_or_copy (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ a[ii] = a[ii] + 1;
+ b[ii] = b[ii] + 2;
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 7.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ d = (float *) acc_deviceptr (&a[0]);
+ acc_unmap_data (&a[0]);
+ acc_free (d);
+
+ d = (float *) acc_deviceptr (&b[0]);
+ acc_unmap_data (&b[0]);
+ acc_free (d);
+
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 7.0;
+ }
+
+#pragma acc data copyin (a[0:N]) create (c[0:N]) copyout (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&c[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 8.0;
+ }
+
+#pragma acc data copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort ();
+
+ if (b[i] != 4.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&c[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 5.0;
+ }
+
+ d = (float *) acc_malloc (N * sizeof (float));
+ acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc data copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 2.0)
+ abort ();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort ();
+
+ d = (float *) acc_deviceptr (c);
+
+ acc_unmap_data (c);
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 8.0;
+ }
+
+ d = (float *) acc_malloc (N * sizeof (float));
+ acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc data copyin (a[0:N]) present (c[0:N]) copyout (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort ();
+
+ if (b[i] != 4.0)
+ abort ();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort ();
+
+ acc_unmap_data (c);
+
+ if (acc_is_present (c, (N * sizeof (float))))
+ abort ();
+
+ acc_free (d);
+
+ d = (float *) acc_malloc (N * sizeof (float));
+ acc_map_data (c, d, N * sizeof (float));
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort ();
+
+ d = (float *) acc_malloc (N * sizeof (float));
+ acc_map_data (b, d, N * sizeof (float));
+
+ if (!acc_is_present (b, (N * sizeof (float))))
+ abort ();
+
+ d = (float *) acc_malloc (N * sizeof (float));
+ acc_map_data (a, d, N * sizeof (float));
+
+ if (!acc_is_present (a, (N * sizeof (float))))
+ abort ();
+
+#pragma acc data present (a[0:N]) present (c[0:N]) present (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ a[ii] = 1.0;
+ c[ii] = 2.0;
+ b[ii] = 4.0;
+ }
+ }
+ }
+
+ if (!acc_is_present (a, (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (b, (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (c, (N * sizeof (float))))
+ abort ();
+
+ acc_copyout (b, N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort ();
+
+ if (b[i] != 4.0)
+ abort ();
+ }
+
+ d = (float *) acc_deviceptr (a);
+
+ acc_unmap_data (a);
+
+ acc_free (d);
+
+ d = (float *) acc_deviceptr (c);
+
+ acc_unmap_data (c);
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 6.0;
+ }
+
+ d = (float *) acc_malloc (N * sizeof (float));
+
+#pragma acc parallel copyin (a[0:N]) deviceptr (d) copyout (b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ d[ii] = a[ii];
+ b[ii] = d[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort ();
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+ d = (float *) acc_copyin (&a[0], N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc data pcopyin (a[0:N]) copyout (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ acc_free (d);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copyin (a[0:N]) pcopyout (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 7.0;
+ }
+
+#pragma acc data copyin (a[0:N]) pcreate (c[0:N]) copyout (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 5.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&c[0], (N * sizeof (float))))
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/nested-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/nested-2.c
new file mode 100644
index 0000000..c164598
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/nested-2.c
@@ -0,0 +1,141 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int
+main (int argc, char *argv[])
+{
+#define N 10
+ char a[N];
+ int i;
+
+ for (i = 0; i < N; ++i)
+ a[i] = 0;
+
+#pragma acc data copy (a)
+ {
+#pragma acc parallel present (a)
+ {
+ int j;
+
+ for (j = 0; j < N; ++j)
+ a[j] = j;
+ }
+ }
+
+ for (i = 0; i < N; ++i)
+ {
+ if (a[i] != i)
+ abort ();
+ }
+
+ for (i = 0; i < N; ++i)
+ a[i] = 0;
+
+#pragma acc data copy (a)
+ {
+#pragma acc kernels present (a)
+ {
+ int j;
+
+ for (j = 0; j < N; ++j)
+ a[j] = j;
+ }
+ }
+
+ for (i = 0; i < N; ++i)
+ {
+ if (a[i] != i)
+ abort ();
+ }
+
+ for (i = 0; i < N; ++i)
+ a[i] = 0;
+
+#pragma acc data copy (a)
+ {
+#pragma acc data present (a)
+ {
+#pragma acc parallel present (a)
+ {
+ int j;
+
+ for (j = 0; j < N; ++j)
+ a[j] = j;
+ }
+ }
+ }
+
+ for (i = 0; i < N; ++i)
+ {
+ if (a[i] != i)
+ abort ();
+ }
+
+#pragma acc data copy (a)
+ {
+#pragma acc data present (a)
+ {
+#pragma acc kernels present (a)
+ {
+ int j;
+
+ for (j = 0; j < N; ++j)
+ a[j] = j;
+ }
+ }
+ }
+
+ for (i = 0; i < N; ++i)
+ {
+ if (a[i] != i)
+ abort ();
+ }
+
+ for (i = 0; i < N; ++i)
+ a[i] = 0;
+
+#pragma acc enter data copyin (a)
+
+#pragma acc data present (a)
+ {
+#pragma acc parallel present (a)
+ {
+ int j;
+
+ for (j = 0; j < N; ++j)
+ a[j] = j;
+ }
+ }
+
+#pragma acc exit data copyout (a)
+
+ for (i = 0; i < N; ++i)
+ {
+ if (a[i] != i)
+ abort ();
+ }
+
+#pragma acc enter data copyin (a)
+
+#pragma acc data present (a)
+ {
+#pragma acc kernels present (a)
+ {
+ int j;
+
+ for (j = 0; j < N; ++j)
+ a[j] = j;
+ }
+ }
+
+#pragma acc exit data copyout (a)
+
+ for (i = 0; i < N; ++i)
+ {
+ if (a[i] != i)
+ abort ();
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/offset-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/offset-1.c
new file mode 100644
index 0000000..0bae23a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/offset-1.c
@@ -0,0 +1,97 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main(int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b;
+ int i;
+
+ a = (float *) malloc(N * sizeof (float));
+ b = (float *) malloc(N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 5.0;
+ }
+
+#pragma acc parallel copyin(a[2:4]) copyout(b[2:4])
+ {
+ b[2] = a[2];
+ b[3] = a[3];
+ }
+
+ for (i = 2; i < 4; i++)
+ {
+ if (a[i] != 2.0)
+ abort();
+
+ if (b[i] != 2.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc parallel copyin(a[0:4]) copyout(b[0:4])
+ {
+ b[0] = a[0];
+ b[1] = a[1];
+ b[2] = a[2];
+ b[3] = a[3];
+ }
+
+ for (i = 0; i < 4; i++)
+ {
+ if (a[i] != 3.0)
+ abort();
+
+ if (b[i] != 3.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ b[i] = 6.0;
+ }
+
+#pragma acc parallel copyin(a[0:4]) copyout(b[4:4])
+ {
+ b[4] = a[0];
+ b[5] = a[1];
+ b[6] = a[2];
+ b[7] = a[3];
+ }
+
+ for (i = 0; i < 4; i++)
+ {
+ if (a[i] != 9.0)
+ abort();
+ }
+
+ for (i = 4; i < 8; i++)
+ {
+ if (b[i] != 9.0)
+ abort();
+ }
+
+ if (acc_is_present (a, (N * sizeof (float))))
+ abort();
+
+ if (acc_is_present (b, (N * sizeof (float))))
+ abort();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-1.c
new file mode 100644
index 0000000..fd9df33
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-1.c
@@ -0,0 +1,206 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int i;
+
+int main(void)
+{
+ int j, v;
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) copyin (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) copyout (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) copy (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) create (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1)
+ abort ();
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copy (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+
+ i = -1;
+ j = -2;
+ v = 0;
+#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_create (i, j)
+ {
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ if (v != 1)
+ abort ();
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+
+#pragma acc data copyin (i, j)
+ {
+#pragma acc parallel /* copyout */ present_or_copyout (v) present (i, j)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ }
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ i = -1;
+ j = -2;
+ v = 0;
+
+#pragma acc data copyin(i, j)
+ {
+#pragma acc parallel /* copyout */ present_or_copyout (v)
+ {
+ if (i != -1 || j != -2)
+ abort ();
+ i = 2;
+ j = 1;
+ if (i != 2 || j != 1)
+ abort ();
+ v = 1;
+ }
+ }
+#if ACC_MEM_SHARED
+ if (v != 1 || i != 2 || j != 1)
+ abort ();
+#else
+ if (v != 1 || i != -1 || j != -2)
+ abort ();
+#endif
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-empty.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-empty.c
new file mode 100644
index 0000000..8e3bb43
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-empty.c
@@ -0,0 +1,6 @@
+int
+main (void)
+{
+#pragma acc parallel
+ ;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pointer-align-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pointer-align-1.c
new file mode 100644
index 0000000..f7d5b9b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pointer-align-1.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+
+/* PR middle-end/63247 */
+
+#include <stdlib.h>
+
+int
+main(int argc, char **argv)
+{
+#define N 4
+ short a[N];
+
+ a[0] = 10;
+ a[1] = 10;
+ a[2] = 10;
+ a[3] = 10;
+
+#pragma acc parallel copy(a[1:N-1])
+ {
+ a[1] = 51;
+ a[2] = 52;
+ a[3] = 53;
+ }
+
+ if (a[0] != 10)
+ abort ();
+ if (a[1] != 51)
+ abort ();
+ if (a[2] != 52)
+ abort ();
+ if (a[3] != 53)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/present-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/present-1.c
new file mode 100644
index 0000000..f331f1f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/present-1.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main (int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b, *c, *d;
+ int i;
+
+ a = (float *) malloc (N * sizeof (float));
+ b = (float *) malloc (N * sizeof (float));
+ c = (float *) malloc (N * sizeof (float));
+
+ d = (float *) acc_malloc (N * sizeof (float));
+ acc_map_data (c, d, N * sizeof (float));
+
+#pragma acc data present (a[0:N]) present (c[0:N]) present (b[0:N])
+ {
+#pragma acc parallel
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ c[ii] = a[ii];
+ b[ii] = c[ii];
+ }
+ }
+ }
+
+ d = (float *) acc_deviceptr (c);
+ acc_unmap_data (c);
+ acc_free (d);
+
+ free (a);
+ free (b);
+ free (c);
+
+ return 0;
+}
+/* { dg-shouldfail "libgomp: present clause: !acc_is_present" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/present-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/present-2.c
new file mode 100644
index 0000000..41efa70
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/present-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <stdlib.h>
+
+int
+main (int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b;
+ int i;
+
+ a = (float *) malloc (N * sizeof (float));
+ b = (float *) malloc (N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copyin(a[0:N]) copyout(b[0:N])
+ {
+
+#pragma acc parallel present(a[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ b[ii] = a[ii];
+ }
+ }
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort ();
+
+ if (b[i] != 4.0)
+ abort ();
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c
new file mode 100644
index 0000000..acf9540
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c
@@ -0,0 +1,174 @@
+/* { dg-do run } */
+
+/* Integer reductions. */
+
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define vl 32
+
+int
+main(void)
+{
+ const int n = 1000;
+ int i;
+ int vresult, result, array[n];
+ bool lvresult, lresult;
+
+ for (i = 0; i < n; i++)
+ array[i] = i;
+
+ result = 0;
+ vresult = 0;
+
+ /* '+' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (+:result)
+ for (i = 0; i < n; i++)
+ result += array[i];
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult += array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* '*' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (*:result)
+ for (i = 0; i < n; i++)
+ result *= array[i];
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult *= array[i];
+
+ if (result != vresult)
+ abort ();
+
+// result = 0;
+// vresult = 0;
+//
+// /* 'max' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result > array[i] ? result : array[i];
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult > array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+//
+// result = 0;
+// vresult = 0;
+//
+// /* 'min' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result < array[i] ? result : array[i];
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult < array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* '&' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&:result)
+ for (i = 0; i < n; i++)
+ result &= array[i];
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult &= array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* '|' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (|:result)
+ for (i = 0; i < n; i++)
+ result |= array[i];
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult |= array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* '^' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (^:result)
+ for (i = 0; i < n; i++)
+ result ^= array[i];
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult ^= array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '&&' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&&:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult && (result > array[i]);
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult && (result > array[i]);
+
+ if (lresult != lvresult)
+ abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '||' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (||:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult || (result > array[i]);
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult || (result > array[i]);
+
+ if (lresult != lvresult)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c
new file mode 100644
index 0000000..c2ec110
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c
@@ -0,0 +1,126 @@
+/* { dg-do run } */
+
+/* float reductions. */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <math.h>
+
+#define vl 32
+
+int
+main(void)
+{
+ const int n = 1000;
+ int i;
+ float vresult, result, array[n];
+ bool lvresult, lresult;
+
+ for (i = 0; i < n; i++)
+ array[i] = i;
+
+ result = 0;
+ vresult = 0;
+
+ /* '+' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (+:result)
+ for (i = 0; i < n; i++)
+ result += array[i];
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult += array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* '*' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (*:result)
+ for (i = 0; i < n; i++)
+ result *= array[i];
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult *= array[i];
+
+ if (fabs(result - vresult) > .0001)
+ abort ();
+// result = 0;
+// vresult = 0;
+//
+// /* 'max' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result > array[i] ? result : array[i];
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult > array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+//
+// result = 0;
+// vresult = 0;
+//
+// /* 'min' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result < array[i] ? result : array[i];
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult < array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '&&' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&&:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult && (result > array[i]);
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult && (result > array[i]);
+
+ if (lresult != lvresult)
+ abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '||' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (||:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult || (result > array[i]);
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult || (result > array[i]);
+
+ if (lresult != lvresult)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c
new file mode 100644
index 0000000..58b49ff
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c
@@ -0,0 +1,126 @@
+/* { dg-do run } */
+
+/* double reductions. */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <math.h>
+
+#define vl 32
+
+int
+main(void)
+{
+ const int n = 1000;
+ int i;
+ double vresult, result, array[n];
+ bool lvresult, lresult;
+
+ for (i = 0; i < n; i++)
+ array[i] = i;
+
+ result = 0;
+ vresult = 0;
+
+ /* '+' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (+:result)
+ for (i = 0; i < n; i++)
+ result += array[i];
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult += array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* '*' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (*:result)
+ for (i = 0; i < n; i++)
+ result *= array[i];
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult *= array[i];
+
+ if (fabs(result - vresult) > .0001)
+ abort ();
+// result = 0;
+// vresult = 0;
+//
+// /* 'max' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result > array[i] ? result : array[i];
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult > array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+//
+// result = 0;
+// vresult = 0;
+//
+// /* 'min' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result < array[i] ? result : array[i];
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult < array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '&&' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&&:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult && (result > array[i]);
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult && (result > array[i]);
+
+ if (lresult != lvresult)
+ abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '||' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (||:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult || (result > array[i]);
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult || (result > array[i]);
+
+ if (lresult != lvresult)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c
new file mode 100644
index 0000000..c8a9a6c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c
@@ -0,0 +1,129 @@
+/* { dg-do run } */
+
+/* complex reductions. */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <math.h>
+#include <complex.h>
+
+#define vl 32
+
+int
+main(void)
+{
+ const int n = 1000;
+ int i;
+ double complex vresult, result, array[n];
+ bool lvresult, lresult;
+
+ for (i = 0; i < n; i++)
+ array[i] = i;
+
+ result = 0;
+ vresult = 0;
+
+ /* '+' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (+:result)
+ for (i = 0; i < n; i++)
+ result += array[i];
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult += array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* Needs support for complex multiplication. */
+
+// /* '*' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (*:result)
+// for (i = 0; i < n; i++)
+// result *= array[i];
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult *= array[i];
+//
+// if (fabs(result - vresult) > .0001)
+// abort ();
+// result = 0;
+// vresult = 0;
+
+// /* 'max' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result > array[i] ? result : array[i];
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult > array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+//
+// result = 0;
+// vresult = 0;
+//
+// /* 'min' reductions. */
+// #pragma acc parallel vector_length (vl)
+// #pragma acc loop reduction (+:result)
+// for (i = 0; i < n; i++)
+// result = result < array[i] ? result : array[i];
+//
+// /* Verify the reduction. */
+// for (i = 0; i < n; i++)
+// vresult = vresult < array[i] ? vresult : array[i];
+//
+// printf("%d != %d\n", result, vresult);
+// if (result != vresult)
+// abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '&&' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (&&:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult && (creal(result) > creal(array[i]));
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult && (creal(result) > creal(array[i]));
+
+ if (lresult != lvresult)
+ abort ();
+
+ result = 5;
+ vresult = 5;
+
+ lresult = false;
+ lvresult = false;
+
+ /* '||' reductions. */
+#pragma acc parallel vector_length (vl)
+#pragma acc loop reduction (||:lresult)
+ for (i = 0; i < n; i++)
+ lresult = lresult || (creal(result) > creal(array[i]));
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ lvresult = lresult || (creal(result) > creal(array[i]));
+
+ if (lresult != lvresult)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
new file mode 100644
index 0000000..757b8be
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
@@ -0,0 +1,32 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main (void)
+{
+ int s1 = 2, s2 = 5, v1 = 2, v2 = 5;
+ int n = 100;
+ int i;
+
+#pragma acc parallel vector_length (1000)
+#pragma acc loop reduction (+:s1, s2)
+ for (i = 0; i < n; i++)
+ {
+ s1 = s1 + 3;
+ s2 = s2 + 2;
+ }
+
+ for (i = 0; i < n; i++)
+ {
+ v1 = v1 + 3;
+ v2 = v2 + 2;
+ }
+
+ if (s1 != v1)
+ abort ();
+
+ if (s2 != v2)
+ abort ();
+
+ return 0;
+} \ No newline at end of file
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-initial-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-initial-1.c
new file mode 100644
index 0000000..81cf865
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-initial-1.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+
+int
+main(void)
+{
+#define I 5
+#define N 11
+#define A 8
+
+ int a = A;
+ int s = I;
+
+#pragma acc parallel vector_length(N)
+ {
+ int i;
+#pragma acc loop reduction(+:s)
+ for (i = 0; i < N; ++i)
+ s += a;
+ }
+
+ if (s != I + N * A)
+ __builtin_abort();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h
new file mode 100644
index 0000000..9db236c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h
@@ -0,0 +1,46 @@
+
+#if ACC_DEVICE_TYPE_nvidia
+
+#pragma acc routine nohost
+static int clock (void)
+{
+ int thetime;
+
+ asm __volatile__ ("mov.u32 %0, %%clock;" : "=r"(thetime));
+
+ return thetime;
+}
+
+#endif
+
+void
+delay (unsigned long *d_o, unsigned long delay)
+{
+ int start, ticks;
+
+ start = clock ();
+
+ ticks = 0;
+
+ while (ticks < delay)
+ ticks = clock () - start;
+
+ return;
+}
+
+void
+delay2 (unsigned long *d_o, unsigned long delay, unsigned long tid)
+{
+ int start, ticks;
+
+ start = clock ();
+
+ ticks = 0;
+
+ while (ticks < delay)
+ ticks = clock () - start;
+
+ d_o[0] = tid;
+
+ return;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx
new file mode 100644
index 0000000..6f748fc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx
@@ -0,0 +1,148 @@
+// BEGIN PREAMBLE
+ .version 3.1
+ .target sm_30
+ .address_size 64
+// END PREAMBLE
+
+// BEGIN FUNCTION DEF: clock
+.func (.param.u32 %out_retval)clock
+{
+.reg.u32 %retval;
+ .reg.u64 %hr10;
+ .reg.u32 %r22;
+ .reg.u32 %r23;
+ .reg.u32 %r24;
+ .local.align 8 .b8 %frame[8];
+ // #APP
+// 7 "subr.c" 1
+ mov.u32 %r24, %clock;
+// 0 "" 2
+ // #NO_APP
+ st.local.u32 [%frame], %r24;
+ ld.local.u32 %r22, [%frame];
+ mov.u32 %r23, %r22;
+ mov.u32 %retval, %r23;
+ st.param.u32 [%out_retval], %retval;
+ ret;
+ }
+// END FUNCTION DEF
+// BEGIN GLOBAL FUNCTION DEF: delay
+.visible .entry delay(.param.u64 %in_ar1, .param.u64 %in_ar2)
+{
+ .reg.u64 %ar1;
+ .reg.u64 %ar2;
+ .reg.u64 %hr10;
+ .reg.u64 %r22;
+ .reg.u32 %r23;
+ .reg.u64 %r24;
+ .reg.u64 %r25;
+ .reg.u32 %r26;
+ .reg.u32 %r27;
+ .reg.u32 %r28;
+ .reg.u32 %r29;
+ .reg.u32 %r30;
+ .reg.u64 %r31;
+ .reg.pred %r32;
+ .local.align 8 .b8 %frame[24];
+ ld.param.u64 %ar1, [%in_ar1];
+ ld.param.u64 %ar2, [%in_ar2];
+ mov.u64 %r24, %ar1;
+ st.u64 [%frame+8], %r24;
+ mov.u64 %r25, %ar2;
+ st.local.u64 [%frame+16], %r25;
+ {
+ .param.u32 %retval_in;
+ {
+ call (%retval_in), clock;
+ }
+ ld.param.u32 %r26, [%retval_in];
+}
+ st.local.u32 [%frame+4], %r26;
+ mov.u32 %r27, 0;
+ st.local.u32 [%frame], %r27;
+ bra $L4;
+$L5:
+ {
+ .param.u32 %retval_in;
+ {
+ call (%retval_in), clock;
+ }
+ ld.param.u32 %r28, [%retval_in];
+}
+ mov.u32 %r23, %r28;
+ ld.local.u32 %r30, [%frame+4];
+ sub.u32 %r29, %r23, %r30;
+ st.local.u32 [%frame], %r29;
+$L4:
+ ld.local.s32 %r22, [%frame];
+ ld.local.u64 %r31, [%frame+16];
+ setp.lo.u64 %r32,%r22,%r31;
+ @%r32 bra $L5;
+ ret;
+ }
+// END FUNCTION DEF
+// BEGIN GLOBAL FUNCTION DEF: delay2
+.visible .entry delay2(.param.u64 %in_ar1, .param.u64 %in_ar2, .param.u64 %in_ar3)
+{
+ .reg.u64 %ar1;
+ .reg.u64 %ar2;
+ .reg.u64 %ar3;
+ .reg.u64 %hr10;
+ .reg.u64 %r22;
+ .reg.u32 %r23;
+ .reg.u64 %r24;
+ .reg.u64 %r25;
+ .reg.u64 %r26;
+ .reg.u32 %r27;
+ .reg.u32 %r28;
+ .reg.u32 %r29;
+ .reg.u32 %r30;
+ .reg.u32 %r31;
+ .reg.u64 %r32;
+ .reg.pred %r33;
+ .reg.u64 %r34;
+ .reg.u64 %r35;
+ .local.align 8 .b8 %frame[32];
+ ld.param.u64 %ar1, [%in_ar1];
+ ld.param.u64 %ar2, [%in_ar2];
+ ld.param.u64 %ar3, [%in_ar3];
+ mov.u64 %r24, %ar1;
+ st.local.u64 [%frame+8], %r24;
+ mov.u64 %r25, %ar2;
+ st.local.u64 [%frame+16], %r25;
+ mov.u64 %r26, %ar3;
+ st.local.u64 [%frame+24], %r26;
+ {
+ .param.u32 %retval_in;
+ {
+ call (%retval_in), clock;
+ }
+ ld.param.u32 %r27, [%retval_in];
+}
+ st.local.u32 [%frame+4], %r27;
+ mov.u32 %r28, 0;
+ st.local.u32 [%frame], %r28;
+ bra $L8;
+$L9:
+ {
+ .param.u32 %retval_in;
+ {
+ call (%retval_in), clock;
+ }
+ ld.param.u32 %r29, [%retval_in];
+}
+ mov.u32 %r23, %r29;
+ ld.local.u32 %r31, [%frame+4];
+ sub.u32 %r30, %r23, %r31;
+ st.local.u32 [%frame], %r30;
+$L8:
+ ld.local.s32 %r22, [%frame];
+ ld.local.u64 %r32, [%frame+16];
+ setp.lo.u64 %r33,%r22,%r32;
+ @%r33 bra $L9;
+ ld.local.u64 %r34, [%frame+8];
+ ld.local.u64 %r35, [%frame+24];
+ st.u64 [%r34], %r35;
+ ret;
+ }
+// END FUNCTION DEF
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h
new file mode 100644
index 0000000..53749da
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h
@@ -0,0 +1,103 @@
+
+#include <stdio.h>
+#include <cuda.h>
+
+static int _Tnum_timers;
+static CUevent *_Tstart_events, *_Tstop_events;
+static CUstream _Tstream;
+
+void
+init_timers (int ntimers)
+{
+ int i;
+ CUresult r;
+
+ _Tnum_timers = ntimers;
+
+ _Tstart_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent));
+ _Tstop_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent));
+
+ r = cuStreamCreate (&_Tstream, CU_STREAM_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ for (i = 0; i < _Tnum_timers; i++)
+ {
+ r = cuEventCreate (&_Tstart_events[i], CU_EVENT_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventCreate failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuEventCreate (&_Tstop_events[i], CU_EVENT_DEFAULT);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventCreate failed: %d\n", r);
+ abort ();
+ }
+ }
+}
+
+void
+fini_timers (void)
+{
+ int i;
+
+ for (i = 0; i < _Tnum_timers; i++)
+ {
+ cuEventDestroy (_Tstart_events[i]);
+ cuEventDestroy (_Tstop_events[i]);
+ }
+
+ cuStreamDestroy (_Tstream);
+
+ free (_Tstart_events);
+ free (_Tstop_events);
+}
+
+void
+start_timer (int timer)
+{
+ CUresult r;
+
+ r = cuEventRecord (_Tstart_events[timer], _Tstream);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventRecord failed: %d\n", r);
+ abort ();
+ }
+}
+
+float
+stop_timer (int timer)
+{
+ CUresult r;
+ float etime;
+
+ r = cuEventRecord (_Tstop_events[timer], _Tstream);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventRecord failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuEventSynchronize (_Tstop_events[timer]);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventSynchronize failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuEventElapsedTime (&etime, _Tstart_events[timer], _Tstop_events[timer]);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuEventElapsedTime failed: %d\n", r);
+ abort ();
+ }
+
+ return etime;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c
new file mode 100644
index 0000000..c7e7257
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c
@@ -0,0 +1,282 @@
+/* Copy of update-1.c with self exchanged with host for #pragma acc update. */
+
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main (int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b, *c;
+ float *d_a, *d_b, *d_c;
+ int i;
+
+ a = (float *) malloc (N * sizeof (float));
+ b = (float *) malloc (N * sizeof (float));
+ c = (float *) malloc (N * sizeof (float));
+
+ d_a = (float *) acc_malloc (N * sizeof (float));
+ d_b = (float *) acc_malloc (N * sizeof (float));
+ d_c = (float *) acc_malloc (N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+ acc_map_data (a, d_a, N * sizeof (float));
+ acc_map_data (b, d_b, N * sizeof (float));
+ acc_map_data (c, d_c, N * sizeof (float));
+
+#pragma acc update device (a[0:N], b[0:N])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update self (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc update device (a[0:N], b[0:N])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update self (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 5.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc update device (a[0:N], b[0:N])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 5.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc update device (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc parallel present (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update self (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 6.0)
+ abort ();
+
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 7.0;
+ b[i] = 2.0;
+ }
+
+#pragma acc update device (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc parallel present (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update self (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 7.0)
+ abort ();
+
+ if (b[i] != 7.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc update device (a[0:N])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update self (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 9.0)
+ abort ();
+
+ if (b[i] != 9.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ }
+
+#pragma acc update device (a[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ }
+
+#pragma acc update device (a[0:N >> 1])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update self (a[0:N], b[0:N])
+
+ for (i = 0; i < (N >> 1); i++)
+ {
+ if (a[i] != 6.0)
+ abort ();
+
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ for (i = (N >> 1); i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 5.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1.c
new file mode 100644
index 0000000..dff139f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1.c
@@ -0,0 +1,280 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
+
+#include <openacc.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+int
+main (int argc, char **argv)
+{
+ int N = 8;
+ float *a, *b, *c;
+ float *d_a, *d_b, *d_c;
+ int i;
+
+ a = (float *) malloc (N * sizeof (float));
+ b = (float *) malloc (N * sizeof (float));
+ c = (float *) malloc (N * sizeof (float));
+
+ d_a = (float *) acc_malloc (N * sizeof (float));
+ d_b = (float *) acc_malloc (N * sizeof (float));
+ d_c = (float *) acc_malloc (N * sizeof (float));
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+ acc_map_data (a, d_a, N * sizeof (float));
+ acc_map_data (b, d_b, N * sizeof (float));
+ acc_map_data (c, d_c, N * sizeof (float));
+
+#pragma acc update device (a[0:N], b[0:N])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc update device (a[0:N], b[0:N])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 5.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 1.0;
+ }
+
+#pragma acc update device (a[0:N], b[0:N])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update self (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 5.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc update device (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc parallel present (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 6.0)
+ abort ();
+
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 7.0;
+ b[i] = 2.0;
+ }
+
+#pragma acc update device (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc parallel present (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 7.0)
+ abort ();
+
+ if (b[i] != 7.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 9.0;
+ }
+
+#pragma acc update device (a[0:N])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 9.0)
+ abort ();
+
+ if (b[i] != 9.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ }
+
+#pragma acc update device (a[0:N])
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 6.0;
+ }
+
+#pragma acc update device (a[0:N >> 1])
+
+#pragma acc parallel present (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N])
+
+ for (i = 0; i < (N >> 1); i++)
+ {
+ if (a[i] != 6.0)
+ abort ();
+
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ for (i = (N >> 1); i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 5.0)
+ abort ();
+ }
+
+ if (!acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (!acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/c.exp b/libgomp/testsuite/libgomp.oacc-c/c.exp
new file mode 100644
index 0000000..c0c70bb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/c.exp
@@ -0,0 +1,71 @@
+# This whole file adapted from libgomp.c/c.exp.
+
+if [info exists lang_library_path] then {
+ unset lang_library_path
+ unset lang_link_flags
+}
+if [info exists lang_test_file] then {
+ unset lang_test_file
+}
+if [info exists lang_include_flags] then {
+ unset lang_include_flags
+}
+
+load_lib libgomp-dg.exp
+load_gcc_lib gcc-dg.exp
+
+# If a testcase doesn't have special options, use these.
+if ![info exists DEFAULT_CFLAGS] then {
+ set DEFAULT_CFLAGS "-O2"
+}
+
+# Initialize dg.
+dg-init
+
+# Turn on OpenACC.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc"
+
+# Gather a list of all tests.
+set tests [lsort [concat \
+ [find $srcdir/$subdir *.c] \
+ [find $srcdir/$subdir/../libgomp.oacc-c-c++-common *.c]]]
+
+set ld_library_path $always_ld_library_path
+append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+set_ld_library_path_env_vars
+
+# Test OpenACC with available accelerators.
+set SAVE_ALWAYS_CFLAGS "$ALWAYS_CFLAGS"
+foreach offload_target_openacc $offload_targets_s_openacc {
+ set ALWAYS_CFLAGS "$SAVE_ALWAYS_CFLAGS"
+ set tagopt "-DACC_DEVICE_TYPE_$offload_target_openacc=1"
+
+ switch $offload_target_openacc {
+ host {
+ set acc_mem_shared 1
+ }
+ host_nonshm {
+ set acc_mem_shared 0
+ }
+ nvidia {
+ # Copy ptx file (TEMPORARY)
+ remote_download host $srcdir/libgomp.oacc-c-c++-common/subr.ptx
+
+ # Where timer.h lives
+ lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/libgomp.oacc-c-c++-common"
+
+ set acc_mem_shared 0
+ }
+ default {
+ set acc_mem_shared 0
+ }
+ }
+ set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+ setenv ACC_DEVICE_TYPE $offload_target_openacc
+
+ dg-runtest $tests "$tagopt" $DEFAULT_CFLAGS
+}
+
+# All done.
+dg-finish
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90
new file mode 100644
index 0000000..52b030b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90
@@ -0,0 +1,10 @@
+! { dg-shouldfail "" { *-*-* } { "*" } { "" } }
+
+program main
+ implicit none
+
+ !$acc parallel
+ call abort
+ !$acc end parallel
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90
new file mode 100644
index 0000000..2ba2bcb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90
@@ -0,0 +1,13 @@
+program main
+ implicit none
+
+ integer :: argc
+ argc = command_argument_count ()
+
+ !$acc parallel copyin(argc)
+ if (argc .ne. 0) then
+ call abort
+ end if
+ !$acc end parallel
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90
new file mode 100644
index 0000000..4488818
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90
@@ -0,0 +1,52 @@
+! { dg-additional-options "-cpp" }
+! TODO: Have to disable the acc_on_device builtin for we want to test the
+! libgomp library function? The command line option
+! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not for
+! Fortran.
+
+use openacc
+implicit none
+
+! Host.
+
+if (.not. acc_on_device (acc_device_none)) call abort
+if (.not. acc_on_device (acc_device_host)) call abort
+if (acc_on_device (acc_device_host_nonshm)) call abort
+if (acc_on_device (acc_device_not_host)) call abort
+if (acc_on_device (acc_device_nvidia)) call abort
+
+
+! Host via offloading fallback mode.
+
+!$acc parallel if(.false.)
+if (.not. acc_on_device (acc_device_none)) call abort
+if (.not. acc_on_device (acc_device_host)) call abort
+if (acc_on_device (acc_device_host_nonshm)) call abort
+if (acc_on_device (acc_device_not_host)) call abort
+if (acc_on_device (acc_device_nvidia)) call abort
+!$acc end parallel
+
+
+#if !ACC_DEVICE_TYPE_host
+
+! Offloaded.
+
+!$acc parallel
+if (acc_on_device (acc_device_none)) call abort
+if (acc_on_device (acc_device_host)) call abort
+#if ACC_DEVICE_TYPE_host_nonshm
+if (.not. acc_on_device (acc_device_host_nonshm)) call abort
+#else
+if (acc_on_device (acc_device_host_nonshm)) call abort
+#endif
+if (.not. acc_on_device (acc_device_not_host)) call abort
+#if ACC_DEVICE_TYPE_nvidia
+if (.not. acc_on_device (acc_device_nvidia)) call abort
+#else
+if (acc_on_device (acc_device_nvidia)) call abort
+#endif
+!$acc end parallel
+
+#endif
+
+end
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f
new file mode 100644
index 0000000..0047a19
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f
@@ -0,0 +1,52 @@
+! { dg-additional-options "-cpp" }
+! TODO: Have to disable the acc_on_device builtin for we want to test
+! the libgomp library function? The command line option
+! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not
+! for Fortran.
+
+ USE OPENACC
+ IMPLICIT NONE
+
+!Host.
+
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+
+
+!Host via offloading fallback mode.
+
+!$ACC PARALLEL IF(.FALSE.)
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+!$ACC END PARALLEL
+
+
+#if !ACC_DEVICE_TYPE_host
+
+! Offloaded.
+
+!$ACC PARALLEL
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+#if ACC_DEVICE_TYPE_host_nonshm
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
+#else
+ IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
+#endif
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+#if ACC_DEVICE_TYPE_nvidia
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#else
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#endif
+!$ACC END PARALLEL
+
+#endif
+
+ END
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f
new file mode 100644
index 0000000..49d7a72
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f
@@ -0,0 +1,52 @@
+! { dg-additional-options "-cpp" }
+! TODO: Have to disable the acc_on_device builtin for we want to test
+! the libgomp library function? The command line option
+! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not
+! for Fortran.
+
+ IMPLICIT NONE
+ INCLUDE "openacc_lib.h"
+
+!Host.
+
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+
+
+!Host via offloading fallback mode.
+
+!$ACC PARALLEL IF(.FALSE.)
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+!$ACC END PARALLEL
+
+
+#if !ACC_DEVICE_TYPE_host
+
+! Offloaded.
+
+!$ACC PARALLEL
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT
+ IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT
+#if ACC_DEVICE_TYPE_host_nonshm
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
+#else
+ IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT
+#endif
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT
+#if ACC_DEVICE_TYPE_nvidia
+ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#else
+ IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT
+#endif
+!$ACC END PARALLEL
+
+#endif
+
+ END
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90
new file mode 100644
index 0000000..b6e637b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90
@@ -0,0 +1,135 @@
+! { dg-do run }
+
+program asyncwait
+ integer, parameter :: N = 64
+ real, allocatable :: a(:), b(:), c(:), d(:), e(:)
+ integer i
+
+ allocate (a(N))
+ allocate (b(N))
+ allocate (c(N))
+ allocate (d(N))
+ allocate (e(N))
+
+ a(:) = 3.0
+ b(:) = 0.0
+
+ !$acc data copy (a(1:N)) copy (b(1:N))
+
+ !$acc parallel async
+ !$acc loop
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ !$acc wait
+ !$acc end data
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 3.0) call abort
+ end do
+
+ a(:) = 2.0
+ b(:) = 0.0
+
+ !$acc data copy (a(1:N)) copy (b(1:N))
+
+ !$acc parallel async (1)
+ !$acc loop
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ !$acc wait (1)
+ !$acc end data
+
+ do i = 1, N
+ if (a(i) .ne. 2.0) call abort
+ if (b(i) .ne. 2.0) call abort
+ end do
+
+ a(:) = 3.0
+ b(:) = 0.0
+ c(:) = 0.0
+ d(:) = 0.0
+
+ !$acc data copy (a(1:N)) copy (b(1:N)) copy (c(1:N)) copy (d(1:N))
+
+ !$acc parallel async (1)
+ do i = 1, N
+ b(i) = (a(i) * a(i) * a(i)) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ do i = 1, N
+ c(i) = (a(i) * 4) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ !$acc loop
+ do i = 1, N
+ d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
+ end do
+ !$acc end parallel
+
+ !$acc wait (1)
+ !$acc end data
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 9.0) call abort
+ if (c(i) .ne. 4.0) call abort
+ if (d(i) .ne. 1.0) call abort
+ end do
+
+ a(:) = 2.0
+ b(:) = 0.0
+ c(:) = 0.0
+ d(:) = 0.0
+ e(:) = 0.0
+
+ !$acc data copy (a(1:N), b(1:N), c(1:N), d(1:N), e(1:N))
+
+ !$acc parallel async (1)
+ do i = 1, N
+ b(i) = (a(i) * a(i) * a(i)) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ !$acc loop
+ do i = 1, N
+ c(i) = (a(i) * 4) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ !$acc loop
+ do i = 1, N
+ d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel wait (1) async (1)
+ !$acc loop
+ do i = 1, N
+ e(i) = a(i) + b(i) + c(i) + d(i)
+ end do
+ !$acc end parallel
+
+ !$acc wait (1)
+ !$acc end data
+
+ do i = 1, N
+ if (a(i) .ne. 2.0) call abort
+ if (b(i) .ne. 4.0) call abort
+ if (c(i) .ne. 4.0) call abort
+ if (d(i) .ne. 1.0) call abort
+ if (e(i) .ne. 11.0) call abort
+ end do
+end program asyncwait
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90
new file mode 100644
index 0000000..bade52b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90
@@ -0,0 +1,40 @@
+! { dg-do run }
+
+program parallel_wait
+ integer, parameter :: N = 64
+ real, allocatable :: a(:), b(:), c(:)
+ integer i
+
+ allocate (a(N))
+ allocate (b(N))
+ allocate (c(N))
+
+ !$acc parallel async (0)
+ !$acc loop
+ do i = 1, N
+ a(i) = 1
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ !$acc loop
+ do i = 1, N
+ b(i) = 1
+ end do
+ !$acc end parallel
+
+ !$acc parallel wait (0, 1)
+ !$acc loop
+ do i = 1, N
+ c(i) = a(i) + b(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (c(i) .ne. 2.0) call abort
+ end do
+
+ deallocate (a)
+ deallocate (b)
+ deallocate (c)
+end program parallel_wait
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90
new file mode 100644
index 0000000..d48dc11
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90
@@ -0,0 +1,42 @@
+! { dg-do run }
+
+program parallel_wait
+ integer, parameter :: N = 64
+ real, allocatable :: a(:), b(:), c(:)
+ integer i
+
+ allocate (a(N))
+ allocate (b(N))
+ allocate (c(N))
+
+ !$acc parallel async (0)
+ !$acc loop
+ do i = 1, N
+ a(i) = 1
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ !$acc loop
+ do i = 1, N
+ b(i) = 1
+ end do
+ !$acc end parallel
+
+ !$acc wait (0, 1)
+
+ !$acc parallel
+ !$acc loop
+ do i = 1, N
+ c(i) = a(i) + b(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (c(i) .ne. 2.0) call abort
+ end do
+
+ deallocate (a)
+ deallocate (b)
+ deallocate (c)
+end program parallel_wait
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-1.f90
new file mode 100644
index 0000000..4c07bc2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-1.f90
@@ -0,0 +1,27 @@
+! { dg-do run }
+
+program collapse1
+ integer :: i, j, k, a(1:3, 4:6, 5:7)
+ logical :: l
+ l = .false.
+ a(:, :, :) = 0
+ !$acc parallel
+ !$acc loop collapse(4 - 1)
+ do i = 1, 3
+ do j = 4, 6
+ do k = 5, 7
+ a(i, j, k) = i + j + k
+ end do
+ end do
+ end do
+ !$acc loop collapse(2) reduction(.or.:l)
+ do i = 1, 3
+ do j = 4, 6
+ do k = 5, 7
+ if (a(i, j, k) .ne. (i + j + k)) l = .true.
+ end do
+ end do
+ end do
+ !$acc end parallel
+ if (l) call abort
+end program collapse1
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-2.f90
new file mode 100644
index 0000000..ca3b638
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-2.f90
@@ -0,0 +1,25 @@
+! { dg-do run }
+
+program collapse2
+ integer :: i, j, k, a(1:3, 4:6, 5:7)
+ logical :: l
+ l = .false.
+ a(:, :, :) = 0
+ !$acc parallel
+ !$acc loop collapse(4 - 1)
+ do 164 i = 1, 3
+ do 164 j = 4, 6
+ do 164 k = 5, 7
+ a(i, j, k) = i + j + k
+164 end do
+ !$acc loop collapse(2) reduction(.or.:l)
+firstdo: do i = 1, 3
+ do j = 4, 6
+ do k = 5, 7
+ if (a(i, j, k) .ne. (i + j + k)) l = .true.
+ end do
+ end do
+ end do firstdo
+ !$acc end parallel
+ if (l) call abort
+end program collapse2
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-3.f90
new file mode 100644
index 0000000..50e6100
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-3.f90
@@ -0,0 +1,28 @@
+! { dg-do run }
+
+program collapse3
+ integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+ !$acc parallel
+ !$acc loop collapse(3)
+ do 115 k=1,3
+dokk: do kk=1,3
+ do kkk=1,3
+ a(k,kk,kkk) = 1
+ enddo
+ enddo dokk
+115 continue
+ !$acc end parallel
+ if (any(a(1:3,1:3,1:3).ne.1)) call abort
+
+ !$acc parallel
+ !$acc loop collapse(3)
+dol: do 120 l=1,3
+doll: do ll=1,3
+ do lll=1,3
+ a(l,ll,lll) = 2
+ enddo
+ enddo doll
+120 end do dol
+ !$acc end parallel
+ if (any(a(1:3,1:3,1:3).ne.2)) call abort
+end program collapse3
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-4.f90
new file mode 100644
index 0000000..41b66db
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-4.f90
@@ -0,0 +1,40 @@
+! { dg-do run }
+
+! collapse3.f90:test1
+program collapse4
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ !$acc parallel
+ !$acc loop collapse (3) reduction (.or.:l)
+ do i = 2, 6
+ do j = -2, 4
+ do k = 13, 18
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = 2, 6
+ do j = -2, 4
+ do k = 13, 18
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.l) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = 2, 6
+ do j = -2, 4
+ do k = 13, 18
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+end program collapse4
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-5.f90
new file mode 100644
index 0000000..8c20f04
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-5.f90
@@ -0,0 +1,48 @@
+! { dg-do run }
+
+! collapse3.f90:test2
+program collapse5
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ integer :: v1, v2, v3, v4, v5, v6
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ v1 = 3
+ v2 = 6
+ v3 = -2
+ v4 = 4
+ v5 = 13
+ v6 = 18
+ !$acc parallel
+ !$acc loop collapse (3) reduction (.or.:l)
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ m = i * 100 + j * 10 + k
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.l) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+end program collapse5
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-6.f90
new file mode 100644
index 0000000..7404b91
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-6.f90
@@ -0,0 +1,50 @@
+! { dg-do run }
+
+! collapse3.f90:test3
+program collapse6
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ v1 = 3
+ v2 = 6
+ v3 = -2
+ v4 = 4
+ v5 = 13
+ v6 = 18
+ v7 = 1
+ v8 = 1
+ v9 = 1
+ !$acc parallel
+ !$acc loop collapse (3) reduction (.or.:l)
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.r) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+end program collapse6
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-7.f90
new file mode 100644
index 0000000..12efd8c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-7.f90
@@ -0,0 +1,40 @@
+! { dg-do run }
+
+! collapse3.f90:test4
+program collapse7
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ !$acc parallel
+ !$acc loop collapse (3) reduction (.or.:l)
+ do i = 2, 6
+ do j = -2, 4
+ do k = 13, 18
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = 2, 6
+ do j = -2, 4
+ do k = 13, 18
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.r) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = 1, 7
+ do j = -3, 5
+ do k = 12, 19
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+end program collapse7
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/collapse-8.f90 b/libgomp/testsuite/libgomp.oacc-fortran/collapse-8.f90
new file mode 100644
index 0000000..04fbcfe
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/collapse-8.f90
@@ -0,0 +1,47 @@
+! { dg-do run }
+
+! collapse3.f90:test5
+program collapse8
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ integer :: v1, v2, v3, v4, v5, v6
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ v1 = 3
+ v2 = 6
+ v3 = -2
+ v4 = 4
+ v5 = 13
+ v6 = 18
+ !$acc parallel
+ !$acc loop collapse (3) reduction (.or.:l)
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.r) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+end program collapse8
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-1.f90
new file mode 100644
index 0000000..5e94e2d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-1.f90
@@ -0,0 +1,45 @@
+! { dg-do run }
+
+program test
+ integer, parameter :: N = 8
+ real, allocatable :: a(:), b(:)
+
+ allocate (a(N))
+ allocate (b(N))
+
+ a(:) = 3.0
+ b(:) = 0.0
+
+ !$acc enter data copyin (a(1:N), b(1:N))
+
+ !$acc parallel
+ do i = 1, n
+ b(i) = a (i)
+ end do
+ !$acc end parallel
+
+ !$acc exit data copyout (a(1:N), b(1:N))
+
+ do i = 1, n
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 3.0) call abort
+ end do
+
+ a(:) = 5.0
+ b(:) = 1.0
+
+ !$acc enter data copyin (a(1:N), b(1:N))
+
+ !$acc parallel
+ do i = 1, n
+ b(i) = a (i)
+ end do
+ !$acc end parallel
+
+ !$acc exit data copyout (a(1:N), b(1:N))
+
+ do i = 1, n
+ if (a(i) .ne. 5.0) call abort
+ if (b(i) .ne. 5.0) call abort
+ end do
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
new file mode 100644
index 0000000..8736c2a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
@@ -0,0 +1,31 @@
+! { dg-do run }
+
+program test
+ integer, parameter :: N = 8
+ real, allocatable :: a(:,:), b(:,:)
+
+ allocate (a(N,N))
+ allocate (b(N,N))
+
+ a(:,:) = 3.0
+ b(:,:) = 0.0
+
+ !$acc enter data copyin (a(1:N,1:N), b(1:N,1:N))
+
+ !$acc parallel
+ do i = 1, n
+ do j = 1, n
+ b(j,i) = a (j,i)
+ end do
+ end do
+ !$acc end parallel
+
+ !$acc exit data copyout (a(1:N,1:N), b(1:N,1:N))
+
+ do i = 1, n
+ do j = 1, n
+ if (a(j,i) .ne. 3.0) call abort
+ if (b(j,i) .ne. 3.0) call abort
+ end do
+ end do
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90
new file mode 100644
index 0000000..9868cb0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90
@@ -0,0 +1,131 @@
+! { dg-do run }
+
+program asyncwait
+ real, allocatable :: a(:), b(:), c(:), d(:), e(:)
+ integer i, N
+
+ N = 64
+
+ allocate (a(N))
+ allocate (b(N))
+ allocate (c(N))
+ allocate (d(N))
+ allocate (e(N))
+
+ a(:) = 3.0
+ b(:) = 0.0
+
+ !$acc enter data copyin (a(1:N)) copyin (b(1:N)) copyin (N) async
+
+ !$acc parallel async wait
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ !$acc wait
+ !$acc exit data copyout (a(1:N)) copyout (b(1:N))
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 3.0) call abort
+ end do
+
+ a(:) = 2.0
+ b(:) = 0.0
+
+ !$acc enter data copyin (a(1:N)) copyin (b(1:N)) async (1)
+
+ !$acc parallel async (1) wait (1)
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ !$acc wait (1)
+ !$acc exit data copyout (a(1:N)) copyout (b(1:N))
+
+ do i = 1, N
+ if (a(i) .ne. 2.0) call abort
+ if (b(i) .ne. 2.0) call abort
+ end do
+
+ a(:) = 3.0
+ b(:) = 0.0
+ c(:) = 0.0
+ d(:) = 0.0
+
+ !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) create (d(1:N))
+
+ !$acc parallel async (1)
+ do i = 1, N
+ b(i) = (a(i) * a(i) * a(i)) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ do i = 1, N
+ c(i) = (a(i) * 4) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ do i = 1, N
+ d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
+ end do
+ !$acc end parallel
+
+ !$acc wait (1)
+ !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) copyout (d(1:N))
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 9.0) call abort
+ if (c(i) .ne. 4.0) call abort
+ if (d(i) .ne. 1.0) call abort
+ end do
+
+ a(:) = 2.0
+ b(:) = 0.0
+ c(:) = 0.0
+ d(:) = 0.0
+ e(:) = 0.0
+
+ !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) create (d(1:N)) copyin (e(1:N))
+
+ !$acc parallel async (1)
+ do i = 1, N
+ b(i) = (a(i) * a(i) * a(i)) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ do i = 1, N
+ c(i) = (a(i) * 4) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ do i = 1, N
+ d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel wait (1) async (1)
+ do i = 1, N
+ e(i) = a(i) + b(i) + c(i) + d(i)
+ end do
+ !$acc end parallel
+
+ !$acc wait (1)
+ !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) copyout (d(1:N)) copyout (e(1:N))
+ !$acc exit data delete (N)
+
+ do i = 1, N
+ if (a(i) .ne. 2.0) call abort
+ if (b(i) .ne. 4.0) call abort
+ if (c(i) .ne. 4.0) call abort
+ if (d(i) .ne. 1.0) call abort
+ if (e(i) .ne. 11.0) call abort
+ end do
+end program asyncwait
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-4-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-4-2.f90
new file mode 100644
index 0000000..16a8598
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-4-2.f90
@@ -0,0 +1,138 @@
+! Copy of data-4.f90 with self exchanged with host for !acc update.
+
+! { dg-do run }
+
+program asyncwait
+ real, allocatable :: a(:), b(:), c(:), d(:), e(:)
+ integer i, N
+
+ N = 64
+
+ allocate (a(N))
+ allocate (b(N))
+ allocate (c(N))
+ allocate (d(N))
+ allocate (e(N))
+
+ a(:) = 3.0
+ b(:) = 0.0
+
+ !$acc enter data copyin (a(1:N)) copyin (b(1:N)) copyin (N) async
+
+ !$acc parallel async wait
+ !$acc loop
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ !$acc update self (a(1:N), b(1:N)) async wait
+ !$acc wait
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 3.0) call abort
+ end do
+
+ a(:) = 2.0
+ b(:) = 0.0
+
+ !$acc update device (a(1:N), b(1:N)) async (1)
+
+ !$acc parallel async (1) wait (1)
+ !$acc loop
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ !$acc update host (a(1:N), b(1:N)) async (1) wait (1)
+ !$acc wait (1)
+
+ do i = 1, N
+ if (a(i) .ne. 2.0) call abort
+ if (b(i) .ne. 2.0) call abort
+ end do
+
+ a(:) = 3.0
+ b(:) = 0.0
+ c(:) = 0.0
+ d(:) = 0.0
+
+ !$acc enter data copyin (c(1:N), d(1:N)) async (1)
+ !$acc update device (a(1:N), b(1:N)) async (1)
+
+ !$acc parallel async (1)
+ do i = 1, N
+ b(i) = (a(i) * a(i) * a(i)) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ do i = 1, N
+ c(i) = (a(i) * 4) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ do i = 1, N
+ d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
+ end do
+ !$acc end parallel
+
+ !$acc update self (a(1:N), b(1:N), c(1:N), d(1:N)) async (1) wait (1)
+
+ !$acc wait (1)
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 9.0) call abort
+ if (c(i) .ne. 4.0) call abort
+ if (d(i) .ne. 1.0) call abort
+ end do
+
+ a(:) = 2.0
+ b(:) = 0.0
+ c(:) = 0.0
+ d(:) = 0.0
+ e(:) = 0.0
+
+ !$acc enter data copyin (e(1:N)) async (1)
+ !$acc update device (a(1:N), b(1:N), c(1:N), d(1:N)) async (1)
+
+ !$acc parallel async (1)
+ do i = 1, N
+ b(i) = (a(i) * a(i) * a(i)) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ do i = 1, N
+ c(i) = (a(i) * 4) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ do i = 1, N
+ d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel wait (1) async (1)
+ do i = 1, N
+ e(i) = a(i) + b(i) + c(i) + d(i)
+ end do
+ !$acc end parallel
+
+ !$acc update self (a(1:N), b(1:N), c(1:N), d(1:N), e(1:N)) async (1) wait (1)
+ !$acc wait (1)
+ !$acc exit data delete (N, a(1:N), b(1:N), c(1:N), d(1:N), e(1:N))
+
+ do i = 1, N
+ if (a(i) .ne. 2.0) call abort
+ if (b(i) .ne. 4.0) call abort
+ if (c(i) .ne. 4.0) call abort
+ if (d(i) .ne. 1.0) call abort
+ if (e(i) .ne. 11.0) call abort
+ end do
+end program asyncwait
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-4.f90
new file mode 100644
index 0000000..f6886b0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-4.f90
@@ -0,0 +1,136 @@
+! { dg-do run }
+
+program asyncwait
+ real, allocatable :: a(:), b(:), c(:), d(:), e(:)
+ integer i, N
+
+ N = 64
+
+ allocate (a(N))
+ allocate (b(N))
+ allocate (c(N))
+ allocate (d(N))
+ allocate (e(N))
+
+ a(:) = 3.0
+ b(:) = 0.0
+
+ !$acc enter data copyin (a(1:N)) copyin (b(1:N)) copyin (N) async
+
+ !$acc parallel async wait
+ !$acc loop
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ !$acc update host (a(1:N), b(1:N)) async wait
+ !$acc wait
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 3.0) call abort
+ end do
+
+ a(:) = 2.0
+ b(:) = 0.0
+
+ !$acc update device (a(1:N), b(1:N)) async (1)
+
+ !$acc parallel async (1) wait (1)
+ !$acc loop
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ !$acc update self (a(1:N), b(1:N)) async (1) wait (1)
+ !$acc wait (1)
+
+ do i = 1, N
+ if (a(i) .ne. 2.0) call abort
+ if (b(i) .ne. 2.0) call abort
+ end do
+
+ a(:) = 3.0
+ b(:) = 0.0
+ c(:) = 0.0
+ d(:) = 0.0
+
+ !$acc enter data copyin (c(1:N), d(1:N)) async (1)
+ !$acc update device (a(1:N), b(1:N)) async (1)
+
+ !$acc parallel async (1)
+ do i = 1, N
+ b(i) = (a(i) * a(i) * a(i)) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ do i = 1, N
+ c(i) = (a(i) * 4) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ do i = 1, N
+ d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
+ end do
+ !$acc end parallel
+
+ !$acc update host (a(1:N), b(1:N), c(1:N), d(1:N)) async (1) wait (1)
+
+ !$acc wait (1)
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 9.0) call abort
+ if (c(i) .ne. 4.0) call abort
+ if (d(i) .ne. 1.0) call abort
+ end do
+
+ a(:) = 2.0
+ b(:) = 0.0
+ c(:) = 0.0
+ d(:) = 0.0
+ e(:) = 0.0
+
+ !$acc enter data copyin (e(1:N)) async (1)
+ !$acc update device (a(1:N), b(1:N), c(1:N), d(1:N)) async (1)
+
+ !$acc parallel async (1)
+ do i = 1, N
+ b(i) = (a(i) * a(i) * a(i)) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ do i = 1, N
+ c(i) = (a(i) * 4) / a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel async (1)
+ do i = 1, N
+ d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel wait (1) async (1)
+ do i = 1, N
+ e(i) = a(i) + b(i) + c(i) + d(i)
+ end do
+ !$acc end parallel
+
+ !$acc update host (a(1:N), b(1:N), c(1:N), d(1:N), e(1:N)) async (1) wait (1)
+ !$acc wait (1)
+ !$acc exit data delete (N, a(1:N), b(1:N), c(1:N), d(1:N), e(1:N))
+
+ do i = 1, N
+ if (a(i) .ne. 2.0) call abort
+ if (b(i) .ne. 4.0) call abort
+ if (c(i) .ne. 4.0) call abort
+ if (d(i) .ne. 1.0) call abort
+ if (e(i) .ne. 11.0) call abort
+ end do
+end program asyncwait
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-1.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-1.f
new file mode 100644
index 0000000..ac220ab
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-1.f
@@ -0,0 +1,17 @@
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+ IMPLICIT NONE
+ INCLUDE "openacc_lib.h"
+
+ INTEGER I
+
+ CALL ACC_COPYIN (I)
+
+!$ACC DATA COPY (I)
+ I = 0
+!$ACC END DATA
+
+ END
+
+! { dg-shouldfail "" }
+! { dg-output "Trying to map into device .* object when .* is already mapped" }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-2.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-2.f
new file mode 100644
index 0000000..2c5254b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-2.f
@@ -0,0 +1,16 @@
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+ IMPLICIT NONE
+
+ INTEGER I
+
+!$ACC DATA PRESENT_OR_COPY (I)
+!$ACC DATA COPYOUT (I)
+ I = 0
+!$ACC END DATA
+!$ACC END DATA
+
+ END
+
+! { dg-shouldfail "" }
+! { dg-output "Trying to map into device .* object when .* is already mapped" }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-3.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-3.f
new file mode 100644
index 0000000..c41de28
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-3.f
@@ -0,0 +1,15 @@
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+ IMPLICIT NONE
+ INCLUDE "openacc_lib.h"
+
+ INTEGER I
+
+!$ACC DATA PRESENT_OR_COPY (I)
+ CALL ACC_COPYIN (I)
+!$ACC END DATA
+
+ END
+
+! { dg-shouldfail "" }
+! { dg-output "already mapped to" }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-4.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-4.f
new file mode 100644
index 0000000..f54bf58
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-4.f
@@ -0,0 +1,14 @@
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+ IMPLICIT NONE
+ INCLUDE "openacc_lib.h"
+
+ INTEGER I
+
+ CALL ACC_PRESENT_OR_COPYIN (I)
+ CALL ACC_COPYIN (I)
+
+ END
+
+! { dg-shouldfail "" }
+! { dg-output "already mapped to" }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-5.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-5.f
new file mode 100644
index 0000000..9a3e94f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-5.f
@@ -0,0 +1,14 @@
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+ IMPLICIT NONE
+ INCLUDE "openacc_lib.h"
+
+ INTEGER I
+
+!$ACC ENTER DATA CREATE (I)
+ CALL ACC_COPYIN (I)
+
+ END
+
+! { dg-shouldfail "" }
+! { dg-output "already mapped to" }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-6.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-6.f
new file mode 100644
index 0000000..eaf5d98
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-6.f
@@ -0,0 +1,14 @@
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+ IMPLICIT NONE
+ INCLUDE "openacc_lib.h"
+
+ INTEGER I
+
+ CALL ACC_PRESENT_OR_COPYIN (I)
+!$ACC ENTER DATA CREATE (I)
+
+ END
+
+! { dg-shouldfail "" }
+! { dg-output "already mapped to" }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-7.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-7.f
new file mode 100644
index 0000000..d96bf0b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-7.f
@@ -0,0 +1,14 @@
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+ IMPLICIT NONE
+ INCLUDE "openacc_lib.h"
+
+ INTEGER I
+
+!$ACC ENTER DATA CREATE (I)
+ CALL ACC_CREATE (I)
+
+ END
+
+! { dg-shouldfail "" }
+! { dg-output "already mapped to" }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-already-8.f b/libgomp/testsuite/libgomp.oacc-fortran/data-already-8.f
new file mode 100644
index 0000000..16da048
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-already-8.f
@@ -0,0 +1,16 @@
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+ IMPLICIT NONE
+
+ INTEGER I
+
+!$ACC DATA CREATE (I)
+!$ACC PARALLEL COPYIN (I)
+ I = 0
+!$ACC END PARALLEL
+!$ACC END DATA
+
+ END
+
+! { dg-shouldfail "" }
+! { dg-output "Trying to map into device .* object when .* is already mapped" }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
new file mode 100644
index 0000000..a8f62e8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
@@ -0,0 +1,98 @@
+# This whole file adapted from libgomp.fortran/fortran.exp.
+
+load_lib libgomp-dg.exp
+load_gcc_lib gcc-dg.exp
+load_gcc_lib gfortran-dg.exp
+
+global shlib_ext
+global ALWAYS_CFLAGS
+
+set shlib_ext [get_shlib_extension]
+set lang_library_path "../libgfortran/.libs"
+set lang_link_flags "-lgfortran"
+if [info exists lang_include_flags] then {
+ unset lang_include_flags
+}
+set lang_test_file_found 0
+set quadmath_library_path "../libquadmath/.libs"
+
+
+# Initialize dg.
+dg-init
+
+# Turn on OpenACC.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenacc"
+
+if { $blddir != "" } {
+ set lang_source_re {^.*\.[fF](|90|95|03|08)$}
+ set lang_include_flags "-fintrinsic-modules-path=${blddir}"
+ # Look for a static libgfortran first.
+ if [file exists "${blddir}/${lang_library_path}/libgfortran.a"] {
+ set lang_test_file "${lang_library_path}/libgfortran.a"
+ set lang_test_file_found 1
+ # We may have a shared only build, so look for a shared libgfortran.
+ } elseif [file exists "${blddir}/${lang_library_path}/libgfortran.${shlib_ext}"] {
+ set lang_test_file "${lang_library_path}/libgfortran.${shlib_ext}"
+ set lang_test_file_found 1
+ } else {
+ puts "No libgfortran library found, will not execute fortran tests"
+ }
+} elseif [info exists GFORTRAN_UNDER_TEST] {
+ set lang_test_file_found 1
+ # Needs to exist for libgomp.exp.
+ set lang_test_file ""
+} else {
+ puts "GFORTRAN_UNDER_TEST not defined, will not execute fortran tests"
+}
+
+if { $lang_test_file_found } {
+ # Gather a list of all tests.
+ set tests [lsort [find $srcdir/$subdir *.\[fF\]{,90,95,03,08}]]
+
+ if { $blddir != "" } {
+ if { [file exists "${blddir}/${quadmath_library_path}/libquadmath.a"]
+ || [file exists "${blddir}/${quadmath_library_path}/libquadmath.${shlib_ext}"] } {
+ lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/${quadmath_library_path}/"
+ # Allow for spec subsitution.
+ lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/${quadmath_library_path}/"
+ set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}:${blddir}/${quadmath_library_path}"
+ } else {
+ set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}"
+ }
+ } else {
+ set ld_library_path "$always_ld_library_path"
+ }
+ append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+ set_ld_library_path_env_vars
+
+ # Test OpenACC with available accelerators.
+ foreach offload_target_openacc $offload_targets_s_openacc {
+ set tagopt "-DACC_DEVICE_TYPE_$offload_target_openacc=1"
+
+ switch $offload_target_openacc {
+ host {
+ set acc_mem_shared 1
+ }
+ host_nonshm {
+ set acc_mem_shared 0
+ }
+ nvidia {
+ set acc_mem_shared 0
+ }
+ default {
+ set acc_mem_shared 0
+ }
+ }
+ set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared"
+
+ setenv ACC_DEVICE_TYPE $offload_target_openacc
+
+ # For Fortran we're doing torture testing, as Fortran has far more tests
+ # with arrays etc. that testing just -O0 or -O2 is insufficient, that is
+ # typically not the case for C/C++.
+ gfortran-dg-runtest $tests "$tagopt" ""
+ }
+}
+
+# All done.
+dg-finish
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90
new file mode 100644
index 0000000..51dc452
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90
@@ -0,0 +1,13 @@
+use openacc
+
+if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+call acc_set_device_type (acc_device_host)
+if (acc_get_device_type () .ne. acc_device_host) call abort
+call acc_set_device_num (0, acc_device_host)
+if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+call acc_shutdown (acc_device_host)
+
+call acc_init (acc_device_host)
+call acc_shutdown (acc_device_host)
+
+end
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90
new file mode 100644
index 0000000..a54d6a7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90
@@ -0,0 +1,82 @@
+! { dg-do run }
+
+program main
+ implicit none
+ include "openacc_lib.h"
+
+ integer, target :: a_3d_i(10, 10, 10)
+ complex a_3d_c(10, 10, 10)
+ real a_3d_r(10, 10, 10)
+
+ integer i, j, k
+ complex c
+ real r
+ integer, parameter :: i_size = sizeof (i)
+ integer, parameter :: c_size = sizeof (c)
+ integer, parameter :: r_size = sizeof (r)
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+ call acc_copyin (a_3d_i)
+ call acc_copyin (a_3d_c)
+ call acc_copyin (a_3d_r)
+
+ if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+ do i = 1, 10
+ do j = 1, 10
+ do k = 1, 10
+ if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+ end do
+ end do
+ end do
+
+ call acc_shutdown (acc_device_nvidia)
+
+contains
+
+ subroutine set3d (clear, a_i, a_c, a_r)
+ logical clear
+ integer, dimension (:,:,:), intent (inout) :: a_i
+ complex, dimension (:,:,:), intent (inout) :: a_c
+ real, dimension (:,:,:), intent (inout) :: a_r
+
+ integer i, j, k
+ integer lb1, ub1, lb2, ub2, lb3, ub3
+
+ lb1 = lbound (a_i, 1)
+ ub1 = ubound (a_i, 1)
+
+ lb2 = lbound (a_i, 2)
+ ub2 = ubound (a_i, 2)
+
+ lb3 = lbound (a_i, 3)
+ ub3 = ubound (a_i, 3)
+
+ do i = lb1, ub1
+ do j = lb2, ub2
+ do k = lb3, ub3
+ if (clear) then
+ a_i(i, j, k) = 0
+ a_c(i, j, k) = cmplx (0.0, 0.0)
+ a_r(i, j, k) = 0.0
+ else
+ a_i(i, j, k) = i
+ a_c(i, j, k) = cmplx (i, j)
+ a_r(i, j, k) = i
+ end if
+ end do
+ end do
+ end do
+
+ end subroutine
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f b/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f
new file mode 100644
index 0000000..a9d70b2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f
@@ -0,0 +1,13 @@
+ USE OPENACC
+
+ IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT
+ CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST)
+ IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT
+ CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST)
+ IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT
+ CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+ CALL ACC_INIT (ACC_DEVICE_HOST)
+ CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+ END
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f b/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f
new file mode 100644
index 0000000..56d2cd2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f
@@ -0,0 +1,13 @@
+ INCLUDE "openacc_lib.h"
+
+ IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT
+ CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST)
+ IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT
+ CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST)
+ IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT
+ CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+ CALL ACC_INIT (ACC_DEVICE_HOST)
+ CALL ACC_SHUTDOWN (ACC_DEVICE_HOST)
+
+ END
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90
new file mode 100644
index 0000000..3a2b661
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90
@@ -0,0 +1,35 @@
+! { dg-do run }
+
+program main
+ use openacc
+ implicit none
+
+ integer n
+
+ if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+
+ if (acc_get_num_devices (acc_device_none) .ne. 0) call abort
+
+ call acc_init (acc_device_host)
+
+ if (acc_get_device_type () .ne. acc_device_host) call abort
+
+ call acc_set_device_type (acc_device_host)
+
+ if (acc_get_device_type () .ne. acc_device_host) call abort
+
+ n = 0
+
+ call acc_set_device_num (n, acc_device_host)
+
+ if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+
+ if (.NOT. acc_async_test (n) ) call abort
+
+ call acc_wait (n)
+
+ call acc_wait_all ()
+
+ call acc_shutdown (acc_device_host)
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90
new file mode 100644
index 0000000..e68eb89
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90
@@ -0,0 +1,31 @@
+! { dg-do run }
+
+program main
+ use openacc
+ implicit none
+
+ integer n
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ n = 0
+
+ call acc_set_device_num (n, acc_device_nvidia)
+
+ if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort
+
+ if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then
+
+ n = 1
+
+ call acc_set_device_num (n, acc_device_nvidia)
+
+ if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort
+
+ end if
+
+ call acc_shutdown (acc_device_nvidia)
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90
new file mode 100644
index 0000000..401ad66
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90
@@ -0,0 +1,35 @@
+! { dg-do run }
+
+program main
+ implicit none
+ include "openacc_lib.h"
+
+ integer n
+
+ if (acc_get_num_devices (acc_device_host) .ne. 1) call abort
+
+ if (acc_get_num_devices (acc_device_none) .ne. 0) call abort
+
+ call acc_init (acc_device_host)
+
+ if (acc_get_device_type () .ne. acc_device_host) call abort
+
+ call acc_set_device_type (acc_device_host)
+
+ if (acc_get_device_type () .ne. acc_device_host) call abort
+
+ n = 0
+
+ call acc_set_device_num (n, acc_device_host)
+
+ if (acc_get_device_num (acc_device_host) .ne. 0) call abort
+
+ if (.NOT. acc_async_test (n) ) call abort
+
+ call acc_wait (n)
+
+ call acc_wait_all ()
+
+ call acc_shutdown (acc_device_host)
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90
new file mode 100644
index 0000000..422df53
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90
@@ -0,0 +1,31 @@
+! { dg-do run }
+
+program main
+ implicit none
+ include "openacc_lib.h"
+
+ integer n
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ n = 0
+
+ call acc_set_device_num (n, acc_device_nvidia)
+
+ if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort
+
+ if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then
+
+ n = 1
+
+ call acc_set_device_num (n, acc_device_nvidia)
+
+ if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort
+
+ end if
+
+ call acc_shutdown (acc_device_nvidia)
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90
new file mode 100644
index 0000000..ad758b2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90
@@ -0,0 +1,83 @@
+! { dg-do run }
+
+program main
+ use openacc
+ use iso_c_binding
+ implicit none
+
+ integer, target :: a_3d_i(10, 10, 10)
+ complex a_3d_c(10, 10, 10)
+ real a_3d_r(10, 10, 10)
+
+ integer i, j, k
+ complex c
+ real r
+ integer, parameter :: i_size = sizeof (i)
+ integer, parameter :: c_size = sizeof (c)
+ integer, parameter :: r_size = sizeof (r)
+
+ if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit
+
+ call acc_init (acc_device_nvidia)
+
+ call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r)
+
+ call acc_copyin (a_3d_i)
+ call acc_copyin (a_3d_c)
+ call acc_copyin (a_3d_r)
+
+ if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort
+
+ do i = 1, 10
+ do j = 1, 10
+ do k = 1, 10
+ if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort
+ if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort
+ end do
+ end do
+ end do
+
+ call acc_shutdown (acc_device_nvidia)
+
+contains
+
+ subroutine set3d (clear, a_i, a_c, a_r)
+ logical clear
+ integer, dimension (:,:,:), intent (inout) :: a_i
+ complex, dimension (:,:,:), intent (inout) :: a_c
+ real, dimension (:,:,:), intent (inout) :: a_r
+
+ integer i, j, k
+ integer lb1, ub1, lb2, ub2, lb3, ub3
+
+ lb1 = lbound (a_i, 1)
+ ub1 = ubound (a_i, 1)
+
+ lb2 = lbound (a_i, 2)
+ ub2 = ubound (a_i, 2)
+
+ lb3 = lbound (a_i, 3)
+ ub3 = ubound (a_i, 3)
+
+ do i = lb1, ub1
+ do j = lb2, ub2
+ do k = lb3, ub3
+ if (clear) then
+ a_i(i, j, k) = 0
+ a_c(i, j, k) = cmplx (0.0, 0.0)
+ a_r(i, j, k) = 0.0
+ else
+ a_i(i, j, k) = i
+ a_c(i, j, k) = cmplx (i, j)
+ a_r(i, j, k) = i
+ end if
+ end do
+ end do
+ end do
+
+ end subroutine
+
+end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90
new file mode 100644
index 0000000..082dd8a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90
@@ -0,0 +1,97 @@
+program map
+ integer, parameter :: n = 20, c = 10
+ integer :: i, a(n), b(n)
+
+ a(:) = 0
+ b(:) = 0
+
+ ! COPY
+
+ !$acc parallel copy (a)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ do i = 1, n
+ b(i) = i
+ end do
+
+ call check (a, b, n)
+
+ ! COPYOUT
+
+ a(:) = 0
+
+ !$acc parallel copyout (a)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+ call check (a, b, n)
+
+ ! COPYIN
+
+ a(:) = 0
+
+ !$acc parallel copyout (a) copyin (b)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPY
+
+ !$acc parallel pcopy (a)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPYOUT
+
+ a(:) = 0
+
+ !$acc parallel pcopyout (a)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPYIN
+
+ a(:) = 0
+
+ !$acc parallel pcopyout (a) pcopyin (b)
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+end program map
+
+subroutine check (a, b, n)
+ integer :: n, a(n), b(n)
+ integer :: i
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+end subroutine check
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f
new file mode 100644
index 0000000..db3c6b1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-1.f
@@ -0,0 +1,9 @@
+! { dg-do run }
+
+ program main
+ implicit none
+ include "openacc_lib.h"
+
+ if (openacc_version .ne. 201306) call abort;
+
+ end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90
new file mode 100644
index 0000000..a14ecdd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/openacc_version-2.f90
@@ -0,0 +1,9 @@
+! { dg-do run }
+
+program main
+ use openacc
+ implicit none
+
+ if (openacc_version .ne. 201306) call abort;
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90
new file mode 100644
index 0000000..a5e1fcb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90
@@ -0,0 +1,21 @@
+! PR middle-end/63247
+
+program test
+ implicit none
+
+ integer(kind=2) a(4)
+
+ a = 10;
+
+ !$acc parallel copy(a(2:4))
+ a(2) = 52
+ a(3) = 53
+ a(4) = 54
+ !$acc end parallel
+
+ if (a(1) .ne. 10) call abort
+ if (a(2) .ne. 52) call abort
+ if (a(3) .ne. 53) call abort
+ if (a(4) .ne. 54) call abort
+
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90
new file mode 100644
index 0000000..1a1d4c7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90
@@ -0,0 +1,229 @@
+! { dg-do run }
+
+program test
+ implicit none
+ integer, allocatable :: a1(:)
+ integer, allocatable :: b1(:)
+ integer, allocatable :: c1(:)
+ integer, allocatable :: b2(:,:)
+ integer, allocatable :: c3(:,:,:)
+
+ allocate (a1(5))
+ if (.not.allocated (a1)) call abort()
+
+ a1 = 10
+
+ !$acc parallel copy(a1(1:5))
+ a1(1) = 1
+ a1(2) = 2
+ a1(3) = 3
+ a1(4) = 4
+ a1(5) = 5
+ !$acc end parallel
+
+ if (a1(1) .ne. 1) call abort
+ if (a1(2) .ne. 2) call abort
+ if (a1(3) .ne. 3) call abort
+ if (a1(4) .ne. 4) call abort
+ if (a1(5) .ne. 5) call abort
+
+ deallocate(a1)
+
+ allocate (a1(0:4))
+ if (.not.allocated (a1)) call abort()
+
+ a1 = 10
+
+ !$acc parallel copy(a1(0:4))
+ a1(0) = 1
+ a1(1) = 2
+ a1(2) = 3
+ a1(3) = 4
+ a1(4) = 5
+ !$acc end parallel
+
+ if (a1(0) .ne. 1) call abort
+ if (a1(1) .ne. 2) call abort
+ if (a1(2) .ne. 3) call abort
+ if (a1(3) .ne. 4) call abort
+ if (a1(4) .ne. 5) call abort
+
+ deallocate(a1)
+
+ allocate (b2(5,5))
+ if (.not.allocated (b2)) call abort()
+
+ b2 = 11
+
+ !$acc parallel copy(b2(1:5,1:5))
+ b2(1,1) = 1
+ b2(2,2) = 2
+ b2(3,3) = 3
+ b2(4,4) = 4
+ b2(5,5) = 5
+ !$acc end parallel
+
+ if (b2(1,1) .ne. 1) call abort
+ if (b2(2,2) .ne. 2) call abort
+ if (b2(3,3) .ne. 3) call abort
+ if (b2(4,4) .ne. 4) call abort
+ if (b2(5,5) .ne. 5) call abort
+
+ deallocate(b2)
+
+ allocate (b2(0:4,0:4))
+ if (.not.allocated (b2)) call abort()
+
+ b2 = 11
+
+ !$acc parallel copy(b2(0:4,0:4))
+ b2(0,0) = 1
+ b2(1,1) = 2
+ b2(2,2) = 3
+ b2(3,3) = 4
+ b2(4,4) = 5
+ !$acc end parallel
+
+ if (b2(0,0) .ne. 1) call abort
+ if (b2(1,1) .ne. 2) call abort
+ if (b2(2,2) .ne. 3) call abort
+ if (b2(3,3) .ne. 4) call abort
+ if (b2(4,4) .ne. 5) call abort
+
+ deallocate(b2)
+
+ allocate (c3(5,5,5))
+ if (.not.allocated (c3)) call abort()
+
+ c3 = 12
+
+ !$acc parallel copy(c3(1:5,1:5,1:5))
+ c3(1,1,1) = 1
+ c3(2,2,2) = 2
+ c3(3,3,3) = 3
+ c3(4,4,4) = 4
+ c3(5,5,5) = 5
+ !$acc end parallel
+
+ if (c3(1,1,1) .ne. 1) call abort
+ if (c3(2,2,2) .ne. 2) call abort
+ if (c3(3,3,3) .ne. 3) call abort
+ if (c3(4,4,4) .ne. 4) call abort
+ if (c3(5,5,5) .ne. 5) call abort
+
+ deallocate(c3)
+
+ allocate (c3(0:4,0:4,0:4))
+ if (.not.allocated (c3)) call abort()
+
+ c3 = 12
+
+ !$acc parallel copy(c3(0:4,0:4,0:4))
+ c3(0,0,0) = 1
+ c3(1,1,1) = 2
+ c3(2,2,2) = 3
+ c3(3,3,3) = 4
+ c3(4,4,4) = 5
+ !$acc end parallel
+
+ if (c3(0,0,0) .ne. 1) call abort
+ if (c3(1,1,1) .ne. 2) call abort
+ if (c3(2,2,2) .ne. 3) call abort
+ if (c3(3,3,3) .ne. 4) call abort
+ if (c3(4,4,4) .ne. 5) call abort
+
+ deallocate(c3)
+
+ allocate (a1(5))
+ if (.not.allocated (a1)) call abort()
+
+ allocate (b1(5))
+ if (.not.allocated (b1)) call abort()
+
+ allocate (c1(5))
+ if (.not.allocated (c1)) call abort()
+
+ a1 = 10
+ b1 = 3
+ c1 = 7
+
+ !$acc parallel copyin(a1(1:5)) create(c1(1:5)) copyout(b1(1:5))
+ c1(1) = a1(1)
+ c1(2) = a1(2)
+ c1(3) = a1(3)
+ c1(4) = a1(4)
+ c1(5) = a1(5)
+
+ b1(1) = c1(1)
+ b1(2) = c1(2)
+ b1(3) = c1(3)
+ b1(4) = c1(4)
+ b1(5) = c1(5)
+ !$acc end parallel
+
+ if (b1(1) .ne. 10) call abort
+ if (b1(2) .ne. 10) call abort
+ if (b1(3) .ne. 10) call abort
+ if (b1(4) .ne. 10) call abort
+ if (b1(5) .ne. 10) call abort
+
+ deallocate(a1)
+ deallocate(b1)
+ deallocate(c1)
+
+ allocate (a1(0:4))
+ if (.not.allocated (a1)) call abort()
+
+ allocate (b1(0:4))
+ if (.not.allocated (b1)) call abort()
+
+ allocate (c1(0:4))
+ if (.not.allocated (c1)) call abort()
+
+ a1 = 10
+ b1 = 3
+ c1 = 7
+
+ !$acc parallel copyin(a1(0:4)) create(c1(0:4)) copyout(b1(0:4))
+ c1(0) = a1(0)
+ c1(1) = a1(1)
+ c1(2) = a1(2)
+ c1(3) = a1(3)
+ c1(4) = a1(4)
+
+ b1(0) = c1(0)
+ b1(1) = c1(1)
+ b1(2) = c1(2)
+ b1(3) = c1(3)
+ b1(4) = c1(4)
+ !$acc end parallel
+
+ if (b1(0) .ne. 10) call abort
+ if (b1(1) .ne. 10) call abort
+ if (b1(2) .ne. 10) call abort
+ if (b1(3) .ne. 10) call abort
+ if (b1(4) .ne. 10) call abort
+
+ deallocate(a1)
+ deallocate(b1)
+ deallocate(c1)
+
+ allocate (a1(5))
+ if (.not.allocated (a1)) call abort()
+
+ a1 = 10
+
+ !$acc parallel copy(a1(2:3))
+ a1(2) = 2
+ a1(3) = 3
+ !$acc end parallel
+
+ if (a1(1) .ne. 10) call abort
+ if (a1(2) .ne. 2) call abort
+ if (a1(3) .ne. 3) call abort
+ if (a1(4) .ne. 10) call abort
+ if (a1(5) .ne. 10) call abort
+
+ deallocate(a1)
+
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90
new file mode 100644
index 0000000..89e7fe7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90
@@ -0,0 +1,225 @@
+! { dg-do run }
+
+! Integer reductions
+
+program reduction_1
+ implicit none
+
+ integer, parameter :: n = 10, vl = 2
+ integer :: i, vresult, result
+ logical :: lresult, lvresult
+ integer, dimension (n) :: array
+
+ do i = 1, n
+ array(i) = i
+ end do
+
+ result = 0
+ vresult = 0
+
+ ! '+' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(+:result)
+ do i = 1, n
+ result = result + array(i)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult + array(i)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 0
+ vresult = 0
+
+ ! '*' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(*:result)
+ do i = 1, n
+ result = result * array(i)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult * array(i)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 0
+ vresult = 0
+
+ ! 'max' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(max:result)
+ do i = 1, n
+ result = max (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = max (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 1
+ vresult = 1
+
+ ! 'min' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(min:result)
+ do i = 1, n
+ result = min (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = min (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 1
+ vresult = 1
+
+ ! 'iand' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(iand:result)
+ do i = 1, n
+ result = iand (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = iand (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 1
+ vresult = 1
+
+ ! 'ior' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(ior:result)
+ do i = 1, n
+ result = ior (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = ior (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 0
+ vresult = 0
+
+ ! 'ieor' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(ieor:result)
+ do i = 1, n
+ result = ieor (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = ieor (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.and.' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(.and.:lresult)
+ do i = 1, n
+ lresult = lresult .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .and. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.or.' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(.or.:lresult)
+ do i = 1, n
+ lresult = lresult .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .or. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.eqv.' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(.eqv.:lresult)
+ do i = 1, n
+ lresult = lresult .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .eqv. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.neqv.' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(.neqv.:lresult)
+ do i = 1, n
+ lresult = lresult .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .neqv. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+end program reduction_1
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90
new file mode 100644
index 0000000..d3659c9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90
@@ -0,0 +1,170 @@
+! { dg-do run }
+
+! real reductions
+
+program reduction_2
+ implicit none
+
+ integer, parameter :: n = 10, vl = 2
+ integer :: i
+ real, parameter :: e = .001
+ real :: vresult, result
+ logical :: lresult, lvresult
+ real, dimension (n) :: array
+
+ do i = 1, n
+ array(i) = i
+ end do
+
+ result = 0
+ vresult = 0
+
+ ! '+' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(+:result)
+ do i = 1, n
+ result = result + array(i)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult + array(i)
+ end do
+
+ if (abs (result - vresult) .ge. e) call abort
+
+ result = 1
+ vresult = 1
+
+ ! '*' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(*:result)
+ do i = 1, n
+ result = result * array(i)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult * array(i)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 0
+ vresult = 0
+
+ ! 'max' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(max:result)
+ do i = 1, n
+ result = max (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = max (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 1
+ vresult = 1
+
+ ! 'min' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(min:result)
+ do i = 1, n
+ result = min (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = min (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 1
+ vresult = 1
+
+ ! '.and.' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(.and.:lresult)
+ do i = 1, n
+ lresult = lresult .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .and. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.or.' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(.or.:lresult)
+ do i = 1, n
+ lresult = lresult .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .or. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.eqv.' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(.eqv.:lresult)
+ do i = 1, n
+ lresult = lresult .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .eqv. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.neqv.' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(.neqv.:lresult)
+ do i = 1, n
+ lresult = lresult .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .neqv. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+end program reduction_2
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90
new file mode 100644
index 0000000..2b8005d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90
@@ -0,0 +1,170 @@
+! { dg-do run }
+
+! double precision reductions
+
+program reduction_3
+ implicit none
+
+ integer, parameter :: n = 10, vl = 2
+ integer :: i
+ double precision, parameter :: e = .001
+ double precision :: vresult, result
+ logical :: lresult, lvresult
+ double precision, dimension (n) :: array
+
+ do i = 1, n
+ array(i) = i
+ end do
+
+ result = 0
+ vresult = 0
+
+ ! '+' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(+:result)
+ do i = 1, n
+ result = result + array(i)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult + array(i)
+ end do
+
+ if (abs (result - vresult) .ge. e) call abort
+
+ result = 1
+ vresult = 1
+
+ ! '*' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(*:result)
+ do i = 1, n
+ result = result * array(i)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult * array(i)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 0
+ vresult = 0
+
+ ! 'max' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(max:result)
+ do i = 1, n
+ result = max (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = max (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 1
+ vresult = 1
+
+ ! 'min' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(min:result)
+ do i = 1, n
+ result = min (result, array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = min (vresult, array(i))
+ end do
+
+ if (result.ne.vresult) call abort
+
+ result = 1
+ vresult = 1
+
+ ! '.and.' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(.and.:lresult)
+ do i = 1, n
+ lresult = lresult .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .and. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.or.' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(.or.:lresult)
+ do i = 1, n
+ lresult = lresult .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .or. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.eqv.' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(.eqv.:lresult)
+ do i = 1, n
+ lresult = lresult .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .eqv. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+
+ lresult = .false.
+ lvresult = .false.
+
+ ! '.neqv.' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(.neqv.:lresult)
+ do i = 1, n
+ lresult = lresult .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ lvresult = lvresult .neqv. (array(i) .ge. 5)
+ end do
+
+ if (result.ne.vresult) call abort
+end program reduction_3
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90
new file mode 100644
index 0000000..12f7a33
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90
@@ -0,0 +1,54 @@
+! { dg-do run }
+
+! complex reductions
+
+program reduction_4
+ implicit none
+
+ integer, parameter :: n = 10, vl = 32
+ integer :: i
+ complex :: vresult, result
+ complex, dimension (n) :: array
+
+ do i = 1, n
+ array(i) = i
+ end do
+
+ result = 0
+ vresult = 0
+
+ ! '+' reductions
+
+ !$acc parallel vector_length(vl) num_gangs(1)
+ !$acc loop reduction(+:result)
+ do i = 1, n
+ result = result + array(i)
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult + array(i)
+ end do
+
+ if (result .ne. vresult) call abort
+
+ result = 1
+ vresult = 1
+
+! ! '*' reductions
+!
+! !$acc parallel vector_length(vl)
+! !$acc loop reduction(*:result)
+! do i = 1, n
+! result = result * array(i)
+! end do
+! !$acc end parallel
+!
+! ! Verify the results
+! do i = 1, n
+! vresult = vresult * array(i)
+! end do
+!
+! if (result.ne.vresult) call abort
+end program reduction_4
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90
new file mode 100644
index 0000000..df44a7a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90
@@ -0,0 +1,32 @@
+! { dg-do run }
+
+! subroutine reduction
+
+program reduction
+ integer, parameter :: n = 40, c = 10
+ integer :: i, vsum, sum
+
+ call redsub (sum, n, c)
+
+ vsum = 0
+
+ ! Verify the results
+ do i = 1, n
+ vsum = vsum + c
+ end do
+
+ if (sum.ne.vsum) call abort ()
+end program reduction
+
+subroutine redsub(sum, n, c)
+ integer :: sum, n, c
+
+ sum = 0
+
+ !$acc parallel vector_length(n) copyin (n, c) num_gangs(1)
+ !$acc loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + c
+ end do
+ !$acc end parallel
+end subroutine redsub
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90
new file mode 100644
index 0000000..6325431
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90
@@ -0,0 +1,30 @@
+! { dg-do run }
+
+program reduction
+ implicit none
+
+ integer, parameter :: n = 100
+ integer :: i, s1, s2, vs1, vs2
+
+ s1 = 0
+ s2 = 0
+ vs1 = 0
+ vs2 = 0
+
+ !$acc parallel vector_length (1000)
+ !$acc loop reduction(+:s1, s2)
+ do i = 1, n
+ s1 = s1 + 1
+ s2 = s2 + 2
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vs1 = vs1 + 1
+ vs2 = vs2 + 2
+ end do
+
+ if (s1.ne.vs1) call abort ()
+ if (s2.ne.vs2) call abort ()
+end program reduction
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-1.f90
new file mode 100644
index 0000000..3390515
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-1.f90
@@ -0,0 +1,32 @@
+! { dg-do run }
+! { dg-options "-fno-inline" }
+
+ interface
+ recursive function fact (x)
+ !$acc routine
+ integer, intent(in) :: x
+ integer :: fact
+ end function fact
+ end interface
+ integer, parameter :: n = 10
+ integer :: a(n), i
+ !$acc parallel
+ !$acc loop
+ do i = 1, n
+ a(i) = fact (i)
+ end do
+ !$acc end parallel
+ do i = 1, n
+ if (a(i) .ne. fact(i)) call abort
+ end do
+end
+recursive function fact (x) result (res)
+ !$acc routine
+ integer, intent(in) :: x
+ integer :: res
+ if (x < 1) then
+ res = 1
+ else
+ res = x * fact (x - 1)
+ end if
+end function fact
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-2.f90
new file mode 100644
index 0000000..3d418b6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-2.f90
@@ -0,0 +1,29 @@
+! { dg-do run }
+! { dg-options "-fno-inline" }
+
+ module m1
+ contains
+ recursive function fact (x) result (res)
+ !$acc routine
+ integer, intent(in) :: x
+ integer :: res
+ if (x < 1) then
+ res = 1
+ else
+ res = x * fact (x - 1)
+ end if
+ end function fact
+ end module m1
+ use m1
+ integer, parameter :: n = 10
+ integer :: a(n), i
+ !$acc parallel
+ !$acc loop
+ do i = 1, n
+ a(i) = fact (i)
+ end do
+ !$acc end parallel
+ do i = 1, n
+ if (a(i) .ne. fact(i)) call abort
+ end do
+end
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-3.f90
new file mode 100644
index 0000000..d233a63
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-3.f90
@@ -0,0 +1,27 @@
+! { dg-do run }
+! { dg-options "-fno-inline" }
+
+ integer, parameter :: n = 10
+ integer :: a(n), i
+ integer, external :: fact
+ !$acc routine (fact)
+ !$acc parallel
+ !$acc loop
+ do i = 1, n
+ a(i) = fact (i)
+ end do
+ !$acc end parallel
+ do i = 1, n
+ if (a(i) .ne. fact(i)) call abort
+ end do
+end
+recursive function fact (x) result (res)
+ !$acc routine
+ integer, intent(in) :: x
+ integer :: res
+ if (x < 1) then
+ res = 1
+ else
+ res = x * fact (x - 1)
+ end if
+end function fact
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-4.f90
new file mode 100644
index 0000000..3e5fb09
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-4.f90
@@ -0,0 +1,23 @@
+! { dg-do run }
+! { dg-options "-fno-inline" }
+
+ integer, parameter :: n = 10
+ integer :: a(n), i
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc parallel
+ !$acc loop
+ do i = 1, n
+ call incr(a(i))
+ end do
+ !$acc end parallel
+ do i = 1, n
+ if (a(i) .ne. (i + 1)) call abort
+ end do
+end
+subroutine incr (x)
+ !$acc routine
+ integer, intent(inout) :: x
+ x = x + 1
+end subroutine incr
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90
new file mode 100644
index 0000000..b39414f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90
@@ -0,0 +1,97 @@
+program subarrays
+ integer, parameter :: n = 20, c = 10
+ integer :: i, a(n), b(n)
+
+ a(:) = 0
+ b(:) = 0
+
+ ! COPY
+
+ !$acc parallel copy (a(1:n))
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ do i = 1, n
+ b(i) = i
+ end do
+
+ call check (a, b, n)
+
+ ! COPYOUT
+
+ a(:) = 0
+
+ !$acc parallel copyout (a(1:n))
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+ call check (a, b, n)
+
+ ! COPYIN
+
+ a(:) = 0
+
+ !$acc parallel copyout (a(1:n)) copyin (b(1:n))
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPY
+
+ !$acc parallel pcopy (a(1:n))
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPYOUT
+
+ a(:) = 0
+
+ !$acc parallel pcopyout (a(1:n))
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPYIN
+
+ a(:) = 0
+
+ !$acc parallel pcopyout (a(1:n)) pcopyin (b(1:n))
+ !$acc loop
+ do i = 1, n
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+end program subarrays
+
+subroutine check (a, b, n)
+ integer :: n, a(n), b(n)
+ integer :: i
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+end subroutine check
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90
new file mode 100644
index 0000000..81799f6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90
@@ -0,0 +1,100 @@
+program subarrays
+ integer, parameter :: n = 20, c = 10, low = 5, high = 10
+ integer :: i, a(n), b(n)
+
+ a(:) = 0
+ b(:) = 0
+
+ ! COPY
+
+ !$acc parallel copy (a(low:high))
+ !$acc loop
+ do i = low, high
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ do i = low, high
+ b(i) = i
+ end do
+
+ call check (a, b, n)
+
+ ! COPYOUT
+
+ a(:) = 0
+
+ !$acc parallel copyout (a(low:high))
+ !$acc loop
+ do i = low, high
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ do i = low, high
+ if (a(i) .ne. b(i)) call abort
+ end do
+ call check (a, b, n)
+
+ ! COPYIN
+
+ a(:) = 0
+
+ !$acc parallel copyout (a(low:high)) copyin (b(low:high))
+ !$acc loop
+ do i = low, high
+ a(i) = b(i)
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPY
+
+ a(:) = 0
+
+ !$acc parallel pcopy (a(low:high))
+ !$acc loop
+ do i = low, high
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPYOUT
+
+ a(:) = 0
+
+ !$acc parallel pcopyout (a(low:high))
+ !$acc loop
+ do i = low, high
+ a(i) = i
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+
+ ! PRESENT_OR_COPYIN
+
+ a(:) = 0
+
+ !$acc parallel pcopyout (a(low:high)) &
+ !$acc & pcopyin (b(low:high))
+ !$acc loop
+ do i = low, high
+ a(i) = b(i)
+ end do
+ !$acc end parallel
+
+ call check (a, b, n)
+end program subarrays
+
+subroutine check (a, b, n)
+ integer :: n, a(n), b(n)
+ integer :: i
+
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+end subroutine check