diff options
Diffstat (limited to 'gcc/omp-low.cc')
-rw-r--r-- | gcc/omp-low.cc | 1853 |
1 files changed, 1731 insertions, 122 deletions
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index e1036ad..2141b4a 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -106,6 +106,11 @@ struct omp_context construct. In the case of a parallel, this is in the child function. */ tree block_vars; + /* A hash map to track variables added through omp_copy_decl_*, to ensure + repeated calls of install_var_local on sam DECL do not get duplicated + local versions. */ + hash_map<tree, tree> *block_vars_map; + /* Label to which GOMP_cancel{,llation_point} and explicit and implicit barriers should jump to during omplower pass. */ tree cancel_label; @@ -181,6 +186,10 @@ struct omp_context than teams is strictly nested in it. */ bool nonteams_nested_p; + /* Indicates that context is in OMPACC mode, set after _ompacc_ internal + clauses are removed. */ + bool ompacc_p; + /* Candidates for adjusting OpenACC privatization level. */ vec<tree> oacc_privatization_candidates; }; @@ -592,12 +601,26 @@ use_pointer_for_field (tree decl, omp_context *shared_ctx) static tree omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx) { + if (ctx) + { + if (!ctx->block_vars_map) + ctx->block_vars_map = new hash_map<tree, tree> (); + else + { + tree *tp = ctx->block_vars_map->get (var); + if (tp) + return *tp; + } + } + tree copy = copy_var_decl (var, name, type); DECL_CONTEXT (copy) = current_function_decl; if (ctx) { + ctx->block_vars_map->put (var, copy); + DECL_CHAIN (copy) = ctx->block_vars; ctx->block_vars = copy; } @@ -780,29 +803,33 @@ build_sender_ref (tree var, omp_context *ctx) return build_sender_ref ((splay_tree_key) var, ctx); } -/* Add a new field for VAR inside the structure CTX->SENDER_DECL. If - BASE_POINTERS_RESTRICT, declare the field with restrict. */ - static void -install_var_field (tree var, bool by_ref, int mask, omp_context *ctx) +install_var_field (tree var, bool by_ref, int mask, omp_context *ctx, + tree key_expr = NULL_TREE, bool field_may_exist = false) { tree field, type, sfield = NULL_TREE; splay_tree_key key = (splay_tree_key) var; - if ((mask & 16) != 0) - { - key = (splay_tree_key) &DECL_NAME (var); - gcc_checking_assert (key != (splay_tree_key) var); - } - if ((mask & 8) != 0) + if (key_expr) + /* Allow user to explicitly set the expression used as the key. */ + key = (splay_tree_key) key_expr; + else { - key = (splay_tree_key) &DECL_UID (var); - gcc_checking_assert (key != (splay_tree_key) var); + if ((mask & 16) != 0) + { + key = (splay_tree_key) &DECL_NAME (var); + gcc_checking_assert (key != (splay_tree_key) var); + } + if ((mask & 8) != 0) + { + key = (splay_tree_key) &DECL_UID (var); + gcc_checking_assert (key != (splay_tree_key) var); + } } gcc_assert ((mask & 1) == 0 - || !splay_tree_lookup (ctx->field_map, key)); + || !splay_tree_lookup (ctx->field_map, key) || field_may_exist); gcc_assert ((mask & 2) == 0 || !ctx->sfield_map - || !splay_tree_lookup (ctx->sfield_map, key)); + || !splay_tree_lookup (ctx->sfield_map, key) || field_may_exist); gcc_assert ((mask & 3) == 3 || !is_gimple_omp_oacc (ctx->stmt)); @@ -968,6 +995,123 @@ omp_copy_decl (tree var, copy_body_data *cb) return error_mark_node; } +/* Helper function for create_noncontig_array_descr_type(), to append a new field + to a record type. */ + +static void +append_field_to_record_type (tree record_type, tree fld_ident, tree fld_type) +{ + tree *p, fld = build_decl (UNKNOWN_LOCATION, FIELD_DECL, fld_ident, fld_type); + DECL_CONTEXT (fld) = record_type; + + for (p = &TYPE_FIELDS (record_type); *p; p = &DECL_CHAIN (*p)) + ; + *p = fld; +} + +/* Create type for non-contiguous array descriptor. Returns created type, and + returns the number of dimensions in *DIM_NUM. */ + +static tree +create_noncontig_array_descr_type (tree dims, int *dim_num) +{ + int n = 0; + tree array_descr_type, name, x; + gcc_assert (TREE_CODE (dims) == TREE_LIST); + + array_descr_type = lang_hooks.types.make_type (RECORD_TYPE); + name = create_tmp_var_name (".omp_noncontig_array_descr_type"); + name = build_decl (UNKNOWN_LOCATION, TYPE_DECL, name, array_descr_type); + DECL_ARTIFICIAL (name) = 1; + DECL_NAMELESS (name) = 1; + TYPE_NAME (array_descr_type) = name; + TYPE_ARTIFICIAL (array_descr_type) = 1; + + /* Number of dimensions. */ + append_field_to_record_type (array_descr_type, get_identifier ("__dim_num"), + sizetype); + + for (x = dims; x; x = TREE_CHAIN (x), n++) + { + char *fldname; + /* One for the start index. */ + ASM_FORMAT_PRIVATE_NAME (fldname, "__dim_base", n); + append_field_to_record_type (array_descr_type, get_identifier (fldname), + sizetype); + /* One for the length. */ + ASM_FORMAT_PRIVATE_NAME (fldname, "__dim_length", n); + append_field_to_record_type (array_descr_type, get_identifier (fldname), + sizetype); + /* One for the element size. */ + ASM_FORMAT_PRIVATE_NAME (fldname, "__dim_elem_size", n); + append_field_to_record_type (array_descr_type, get_identifier (fldname), + sizetype); + /* One for is_array flag. */ + ASM_FORMAT_PRIVATE_NAME (fldname, "__dim_is_array", n); + append_field_to_record_type (array_descr_type, get_identifier (fldname), + sizetype); + } + + layout_type (array_descr_type); + *dim_num = n; + return array_descr_type; +} + +/* Generate code sequence for initializing non-contiguous array descriptor. */ + +static void +create_noncontig_array_descr_init_code (tree array_descr, tree array_var, + tree dimensions, int dim_num, + gimple_seq *ilist) +{ + tree fld, fldref; + tree array_descr_type = TREE_TYPE (array_descr); + tree dim_type = TREE_TYPE (array_var); + + if (TREE_CODE (dim_type) == REFERENCE_TYPE) + dim_type = TREE_TYPE (dim_type); + + fld = TYPE_FIELDS (array_descr_type); + fldref = omp_build_component_ref (array_descr, fld); + gimplify_assign (fldref, build_int_cst (sizetype, dim_num), ilist); + + while (dimensions) + { + tree dim_base = fold_convert (sizetype, TREE_PURPOSE (dimensions)); + tree dim_length = fold_convert (sizetype, TREE_VALUE (dimensions)); + tree dim_elem_size = TYPE_SIZE_UNIT (TREE_TYPE (dim_type)); + tree dim_is_array = (TREE_CODE (dim_type) == ARRAY_TYPE + ? integer_one_node : integer_zero_node); + /* Set base. */ + fld = TREE_CHAIN (fld); + fldref = omp_build_component_ref (array_descr, fld); + dim_base = fold_build2 (MULT_EXPR, sizetype, dim_base, dim_elem_size); + gimplify_assign (fldref, dim_base, ilist); + + /* Set length. */ + fld = TREE_CHAIN (fld); + fldref = omp_build_component_ref (array_descr, fld); + dim_length = fold_build2 (MULT_EXPR, sizetype, dim_length, dim_elem_size); + gimplify_assign (fldref, dim_length, ilist); + + /* Set elem_size. */ + fld = TREE_CHAIN (fld); + fldref = omp_build_component_ref (array_descr, fld); + dim_elem_size = fold_convert (sizetype, dim_elem_size); + gimplify_assign (fldref, dim_elem_size, ilist); + + /* Set is_array flag. */ + fld = TREE_CHAIN (fld); + fldref = omp_build_component_ref (array_descr, fld); + dim_is_array = fold_convert (sizetype, dim_is_array); + gimplify_assign (fldref, dim_is_array, ilist); + + dimensions = TREE_CHAIN (dimensions); + dim_type = TREE_TYPE (dim_type); + } + gcc_assert (TREE_CHAIN (fld) == NULL_TREE); +} + /* Create a new context, with OUTER_CTX being the surrounding context. */ static omp_context * @@ -1081,6 +1225,7 @@ delete_omp_context (splay_tree_value value) delete ctx->task_reduction_map; } + delete ctx->block_vars_map; delete ctx->lastprivate_conditional_map; delete ctx->allocate_map; @@ -1147,6 +1292,60 @@ fixup_child_record_type (omp_context *ctx) : build_reference_type (type), TYPE_QUAL_RESTRICT); } +/* Build record type for noncontiguous target update operations. Must be kept + in sync with libgomp/libgomp.h omp_noncontig_array_desc. */ + +static tree +omp_noncontig_descriptor_type (location_t loc) +{ + static tree cached = NULL_TREE; + + if (cached) + return cached; + + tree t = make_node (RECORD_TYPE); + + tree fields = build_decl (loc, FIELD_DECL, get_identifier ("__ndims"), + size_type_node); + + tree field = build_decl (loc, FIELD_DECL, get_identifier ("__elemsize"), + size_type_node); + TREE_CHAIN (field) = fields; + fields = field; + + field = build_decl (loc, FIELD_DECL, get_identifier ("__span"), + size_type_node); + TREE_CHAIN (field) = fields; + fields = field; + + tree ptr_size_type = build_pointer_type (size_type_node); + + field = build_decl (loc, FIELD_DECL, get_identifier ("__dim"), ptr_size_type); + TREE_CHAIN (field) = fields; + fields = field; + + field = build_decl (loc, FIELD_DECL, get_identifier ("__index"), + ptr_size_type); + TREE_CHAIN (field) = fields; + fields = field; + + field = build_decl (loc, FIELD_DECL, get_identifier ("__length"), + ptr_size_type); + TREE_CHAIN (field) = fields; + fields = field; + + field = build_decl (loc, FIELD_DECL, get_identifier ("__stride"), + ptr_size_type); + TREE_CHAIN (field) = fields; + fields = field; + + finish_builtin_struct (t, "__omp_noncontig_desc_type", fields, ptr_type_node); + + cached = t; + + return t; +} + /* Instantiate decls as necessary in CTX to satisfy the data sharing specified by CLAUSES. */ @@ -1173,6 +1372,36 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) && omp_maybe_offloaded_ctx (ctx)) error_at (OMP_CLAUSE_LOCATION (c), "%<allocate%> clause must" " specify an allocator here"); + if ((omp_requires_mask & OMP_REQUIRES_DYNAMIC_ALLOCATORS) == 0 + && OMP_CLAUSE_ALLOCATE_ALLOCATOR (c) != NULL_TREE + && DECL_P (OMP_CLAUSE_ALLOCATE_ALLOCATOR (c)) + && !DECL_ARTIFICIAL (OMP_CLAUSE_ALLOCATE_ALLOCATOR (c))) + { + tree alloc2 = OMP_CLAUSE_ALLOCATE_ALLOCATOR (c); + if (TREE_CODE (alloc2) == MEM_REF + || TREE_CODE (alloc2) == INDIRECT_REF) + alloc2 = TREE_OPERAND (alloc2, 0); + omp_context *ctx2 = ctx; + for (; ctx2; ctx2 = ctx2->outer) + if (is_gimple_omp_offloaded (ctx2->stmt)) + break; + if (ctx2 != NULL) + { + tree c2 = gimple_omp_target_clauses (ctx2->stmt); + for (; c2; c2 = OMP_CLAUSE_CHAIN (c2)) + if (OMP_CLAUSE_CODE (c2) == OMP_CLAUSE_USES_ALLOCATORS + && operand_equal_p ( + alloc2, OMP_CLAUSE_USES_ALLOCATORS_ALLOCATOR (c2))) + break; + if (c2 == NULL_TREE) + error_at (EXPR_LOC_OR_LOC (OMP_CLAUSE_ALLOCATE_ALLOCATOR (c), + OMP_CLAUSE_LOCATION (c)), + "allocator %qE in %<allocate%> clause inside a " + "target region must be specified in an " + "%<uses_allocators%> clause on the %<target%> " + "directive", alloc2); + } + } if (ctx->allocate_map == NULL) ctx->allocate_map = new hash_map<tree, tree>; tree val = integer_zero_node; @@ -1385,8 +1614,13 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) || (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_HAS_DEVICE_ADDR && lang_hooks.decls.omp_array_data (decl, true))) { + /* OpenACC firstprivate clauses are later processed with same + code path as map clauses in lower_omp_target, so follow + the same convention of using the whole clause expression + as splay-tree key. */ + tree k = (is_oacc_parallel_or_serial (ctx) ? c : NULL_TREE); by_ref = !omp_privatize_by_reference (decl); - install_var_field (decl, by_ref, 3, ctx); + install_var_field (decl, by_ref, 3, ctx, k); } else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_HAS_DEVICE_ADDR) { @@ -1591,7 +1825,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)) && varpool_node::get_create (decl)->offloadable && !lookup_attribute ("omp declare target link", - DECL_ATTRIBUTES (decl))) + DECL_ATTRIBUTES (decl)) + && !is_gimple_omp_oacc (ctx->stmt)) break; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER) @@ -1670,7 +1905,104 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) install_var_local (decl, ctx); break; } - if (DECL_P (decl)) + + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && GOMP_MAP_NONCONTIG_ARRAY_P (OMP_CLAUSE_MAP_KIND (c))) + { + tree array_decl = OMP_CLAUSE_DECL (c); + tree array_type = TREE_TYPE (array_decl); + bool by_ref = (TREE_CODE (array_type) == ARRAY_TYPE + ? true : false); + + /* Checking code to ensure we only have arrays at top dimension. + This limitation might be lifted in the future. See PR76639. */ + if (TREE_CODE (array_type) == REFERENCE_TYPE) + array_type = TREE_TYPE (array_type); + tree t = array_type, prev_t = NULL_TREE; + while (t) + { + if (TREE_CODE (t) == ARRAY_TYPE && prev_t) + { + error_at (gimple_location (ctx->stmt), "array types are" + " only allowed at outermost dimension of" + " non-contiguous array"); + break; + } + prev_t = t; + t = TREE_TYPE (t); + } + + install_var_field (array_decl, by_ref, 3, ctx, c); + install_var_local (array_decl, ctx); + break; + } + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_TO_GRID + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FROM_GRID)) + { + tree desc_type = omp_noncontig_descriptor_type (UNKNOWN_LOCATION); + + tree bare = decl; + if (TREE_CODE (bare) == VIEW_CONVERT_EXPR) + bare = TREE_OPERAND (bare, 0); + + const char *desc_name = ".omp_noncontig_desc"; + /* Try (but not too hard) to make a friendly name for the + descriptor. */ + if (DECL_P (bare)) + desc_name = ACONCAT ((".omp_nc_desc_", + IDENTIFIER_POINTER (DECL_NAME (bare)), + NULL)); + tree desc = create_tmp_var (desc_type, desc_name); + DECL_NAMELESS (desc) = 1; + TREE_ADDRESSABLE (desc) = 1; + + /* Adjust DECL so it refers to the first element of the array: + either by indirecting a pointer, or by selecting the zero'th + index of each dimension of an array. (We don't have a "bias" + as such for this type of noncontiguous update operation, just + the volume specified in the descriptor we build in + lower_omp_target.) */ + + if (TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE) + { + decl = build_fold_indirect_ref (decl); + OMP_CLAUSE_DECL (c) = decl; + } + + tree field + = build_decl (OMP_CLAUSE_LOCATION (c), FIELD_DECL, NULL_TREE, + ptr_type_node); + SET_DECL_ALIGN (field, TYPE_ALIGN (ptr_type_node)); + insert_field_into_struct (ctx->record_type, field); + splay_tree_insert (ctx->field_map, (splay_tree_key) c, + (splay_tree_value) field); + + tree dn = build_omp_clause (OMP_CLAUSE_LOCATION (c), + OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (dn, GOMP_MAP_TO_PSET); + OMP_CLAUSE_DECL (dn) = desc; + + OMP_CLAUSE_CHAIN (dn) = OMP_CLAUSE_CHAIN (c); + OMP_CLAUSE_CHAIN (c) = dn; + + field = build_decl (OMP_CLAUSE_LOCATION (c), FIELD_DECL, + NULL_TREE, ptr_type_node); + SET_DECL_ALIGN (field, TYPE_ALIGN (ptr_type_node)); + insert_field_into_struct (ctx->record_type, field); + splay_tree_insert (ctx->field_map, (splay_tree_key) dn, + (splay_tree_value) field); + + c = dn; + tree nc; + + while ((nc = OMP_CLAUSE_CHAIN (c)) + && OMP_CLAUSE_CODE (nc) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_GRID_DIM + || OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_GRID_STRIDE)) + c = nc; + } + else if (DECL_P (decl)) { if (DECL_SIZE (decl) && !poly_int_tree_p (DECL_SIZE (decl))) @@ -1679,7 +2011,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) gcc_assert (INDIRECT_REF_P (decl2)); decl2 = TREE_OPERAND (decl2, 0); gcc_assert (DECL_P (decl2)); - install_var_field (decl2, true, 3, ctx); + install_var_field (decl2, true, 3, ctx, c); install_var_local (decl2, ctx); install_var_local (decl, ctx); } @@ -1689,9 +2021,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE) - install_var_field (decl, true, 7, ctx); + install_var_field (decl, true, 7, ctx, c, true); else - install_var_field (decl, true, 3, ctx); + install_var_field (decl, true, 3, ctx, c, true); if (is_gimple_omp_offloaded (ctx->stmt) && !(is_gimple_omp_oacc (ctx->stmt) && OMP_CLAUSE_MAP_IN_REDUCTION (c))) @@ -1721,13 +2053,26 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) } gcc_assert (!splay_tree_lookup (ctx->field_map, (splay_tree_key) decl)); + tree ptr_type = ptr_type_node; + if (TREE_CODE (decl) == ARRAY_REF) + { + tree array_type = TREE_TYPE (TREE_OPERAND (decl, 0)); + ptr_type = build_pointer_type (array_type); + } tree field = build_decl (OMP_CLAUSE_LOCATION (c), - FIELD_DECL, NULL_TREE, ptr_type_node); - SET_DECL_ALIGN (field, TYPE_ALIGN (ptr_type_node)); + FIELD_DECL, NULL_TREE, ptr_type); + SET_DECL_ALIGN (field, TYPE_ALIGN (ptr_type)); insert_field_into_struct (ctx->record_type, field); - splay_tree_insert (ctx->field_map, (splay_tree_key) decl, + splay_tree_insert (ctx->field_map, (splay_tree_key) /*decl,xxx*/ c, (splay_tree_value) field); + + if (TREE_CODE (decl) == ARRAY_REF + && is_gimple_omp_offloaded (ctx->stmt) + && !splay_tree_lookup (ctx->field_map, + (splay_tree_key) base)) + splay_tree_insert (ctx->field_map, (splay_tree_key) base, + (splay_tree_value) field); } } break; @@ -1768,6 +2113,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_FINALIZE: case OMP_CLAUSE_TASK_REDUCTION: case OMP_CLAUSE_ALLOCATE: + case OMP_CLAUSE_USES_ALLOCATORS: + case OMP_CLAUSE__OMPACC_: break; case OMP_CLAUSE_ALIGNED: @@ -1908,6 +2255,11 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) && is_omp_target (ctx->stmt) && !is_gimple_omp_offloaded (ctx->stmt)) break; + if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_TO_GRID + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FROM_GRID + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_GRID_DIM + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_GRID_STRIDE) + break; if (DECL_P (decl)) { if ((OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER @@ -1994,6 +2346,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_INIT: case OMP_CLAUSE_USE: case OMP_CLAUSE_DESTROY: + case OMP_CLAUSE_USES_ALLOCATORS: + case OMP_CLAUSE__OMPACC_: break; case OMP_CLAUSE__CACHE_: @@ -2063,6 +2417,21 @@ omp_maybe_offloaded_ctx (omp_context *ctx) return false; } +static bool +ompacc_ctx_p (omp_context *ctx) +{ + if (cgraph_node::get (current_function_decl)->offloadable + && lookup_attribute ("ompacc", + DECL_ATTRIBUTES (current_function_decl))) + return true; + for (; ctx; ctx = ctx->outer) + if (is_gimple_omp_offloaded (ctx->stmt)) + return (ctx->ompacc_p + || omp_find_clause (gimple_omp_target_clauses (ctx->stmt), + OMP_CLAUSE__OMPACC_)); + return false; +} + /* Build a decl for the omp child function. It'll not contain a body yet, just the bare decl. */ @@ -2368,8 +2737,28 @@ scan_omp_parallel (gimple_stmt_iterator *gsi, omp_context *outer_ctx) DECL_NAMELESS (name) = 1; TYPE_NAME (ctx->record_type) = name; TYPE_ARTIFICIAL (ctx->record_type) = 1; - create_omp_child_function (ctx, false); - gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn); + + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC + && ompacc_ctx_p (ctx)) + { + tree data_name = get_identifier (".omp_data_i_par"); + tree t = build_decl (gimple_location (stmt), VAR_DECL, data_name, + ptr_type_node); + DECL_ARTIFICIAL (t) = 1; + DECL_NAMELESS (t) = 1; + DECL_CONTEXT (t) = current_function_decl; + DECL_SEEN_IN_BIND_EXPR_P (t) = 1; + DECL_CHAIN (t) = ctx->block_vars; + ctx->block_vars = t; + TREE_USED (t) = 1; + TREE_READONLY (t) = 1; + ctx->receiver_decl = t; + } + else + { + create_omp_child_function (ctx, false); + gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn); + } scan_sharing_clauses (gimple_omp_parallel_clauses (stmt), ctx); scan_omp (gimple_omp_body_ptr (stmt), ctx); @@ -3119,6 +3508,50 @@ scan_omp_single (gomp_single *stmt, omp_context *outer_ctx) layout_type (ctx->record_type); } +/* Reorder clauses so that non-contiguous array map clauses are placed at the very + front of the chain. */ + +static void +reorder_noncontig_array_clauses (tree *clauses_ptr) +{ + tree c, clauses = *clauses_ptr; + tree prev_clause = NULL_TREE, next_clause; + tree array_clauses = NULL_TREE, array_clauses_tail = NULL_TREE; + + for (c = clauses; c; c = next_clause) + { + next_clause = OMP_CLAUSE_CHAIN (c); + + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && GOMP_MAP_NONCONTIG_ARRAY_P (OMP_CLAUSE_MAP_KIND (c))) + { + /* Unchain c from clauses. */ + if (c == clauses) + clauses = next_clause; + + /* Link on to array_clauses. */ + if (array_clauses_tail) + OMP_CLAUSE_CHAIN (array_clauses_tail) = c; + else + array_clauses = c; + array_clauses_tail = c; + + if (prev_clause) + OMP_CLAUSE_CHAIN (prev_clause) = next_clause; + continue; + } + + prev_clause = c; + } + + /* Place non-contiguous array clauses at the start of the clause list. */ + if (array_clauses) + { + OMP_CLAUSE_CHAIN (array_clauses_tail) = clauses; + *clauses_ptr = array_clauses; + } +} + /* Scan a GIMPLE_OMP_TARGET. */ static void @@ -3127,7 +3560,6 @@ scan_omp_target (gomp_target *stmt, omp_context *outer_ctx) omp_context *ctx; tree name; bool offloaded = is_gimple_omp_offloaded (stmt); - tree clauses = gimple_omp_target_clauses (stmt); ctx = new_omp_context (stmt, outer_ctx); ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0); @@ -3140,6 +3572,14 @@ scan_omp_target (gomp_target *stmt, omp_context *outer_ctx) TYPE_NAME (ctx->record_type) = name; TYPE_ARTIFICIAL (ctx->record_type) = 1; + /* If is OpenACC construct, put non-contiguous array clauses (if any) + in front of clause chain. The runtime can then test the first to see + if the additional map processing for them is required. */ + if (is_gimple_omp_oacc (stmt)) + reorder_noncontig_array_clauses (gimple_omp_target_clauses_ptr (stmt)); + + tree clauses = gimple_omp_target_clauses (stmt); + if (offloaded) { create_omp_child_function (ctx, false); @@ -3149,6 +3589,24 @@ scan_omp_target (gomp_target *stmt, omp_context *outer_ctx) scan_sharing_clauses (clauses, ctx); scan_omp (gimple_omp_body_ptr (stmt), ctx); + if (offloaded && flag_openmp_target == OMP_TARGET_MODE_OMPACC) + { + for (tree *cp = gimple_omp_target_clauses_ptr (stmt); *cp; + cp = &OMP_CLAUSE_CHAIN (*cp)) + if (OMP_CLAUSE_CODE (*cp) == OMP_CLAUSE__OMPACC_) + { + DECL_ATTRIBUTES (gimple_omp_target_child_fn (stmt)) + = tree_cons (get_identifier ("ompacc"), NULL_TREE, + DECL_ATTRIBUTES (gimple_omp_target_child_fn (stmt))); + /* Unlink and remove. */ + *cp = OMP_CLAUSE_CHAIN (*cp); + + /* Set to true. */ + ctx->ompacc_p = true; + break; + } + } + if (TYPE_FIELDS (ctx->record_type) == NULL) ctx->record_type = ctx->receiver_decl = NULL; else @@ -4452,12 +4910,63 @@ maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) return t ? t : decl; } +/* Returns true if DECL is present inside a field that encloses CTX. */ + +static bool +maybe_lookup_field_in_outer_ctx (tree decl, omp_context *ctx) +{ + omp_context *up; + + for (up = ctx->outer; up; up = up->outer) + { + for (tree c = gimple_omp_target_clauses (up->stmt); + c != NULL_TREE; c = OMP_CLAUSE_CHAIN (c)) + if ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_TO + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FROM + || (is_oacc_parallel_or_serial (up) + && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)) + && OMP_CLAUSE_DECL (c) == decl) + return true; + if (maybe_lookup_field (decl, up)) + return true; + } + + return false; +} /* Construct the initialization value for reduction operation OP. */ tree omp_reduction_init_op (location_t loc, enum tree_code op, tree type) { + if (TREE_CODE (type) == ARRAY_TYPE) + { + tree max = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); + if (TREE_CONSTANT (max)) + { + vec<constructor_elt, va_gc> *v = NULL; + HOST_WIDE_INT max_val = tree_to_shwi (max); + tree t = omp_reduction_init_op (loc, op, TREE_TYPE (type)); + for (HOST_WIDE_INT i = 0; i <= max_val; i++) + CONSTRUCTOR_APPEND_ELT (v, size_int (i), t); + return build_constructor (type, v); + } + else + gcc_unreachable (); + } + else if (TREE_CODE (type) == RECORD_TYPE) + { + vec<constructor_elt, va_gc> *v = NULL; + for (tree fld = TYPE_FIELDS (type); fld; fld = TREE_CHAIN (fld)) + if (TREE_CODE (fld) == FIELD_DECL) + { + tree t = omp_reduction_init_op (loc, op, TREE_TYPE (fld)); + CONSTRUCTOR_APPEND_ELT (v, fld, t); + } + return build_constructor (type, v); + } + switch (op) { case PLUS_EXPR: @@ -7407,6 +7916,76 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *body_p, gimple_seq_add_seq (stmt_list, post_stmt_list); } +/* Give an array reduction clause, and the surrounding map clause that mapped + the array (section), calculate the actual bias for the reduction inside + the OpenACC region, generally just: reduction_bias - map_bias, but + encapsulate the hairy details. */ + +static tree +oacc_array_reduction_bias (location_t loc, tree reduction_clause, + omp_context *ctx, tree map_clause, + omp_context *outer) +{ + tree bias = TREE_OPERAND (OMP_CLAUSE_DECL (reduction_clause), 1); + tree orig_var = TREE_OPERAND (OMP_CLAUSE_DECL (reduction_clause), 0); + if (TREE_CODE (orig_var) == POINTER_PLUS_EXPR) + { + tree b = TREE_OPERAND (orig_var, 1); + b = maybe_lookup_decl (b, ctx); + if (b == NULL) + { + b = TREE_OPERAND (orig_var, 1); + b = maybe_lookup_decl_in_outer_ctx (b, ctx); + } + if (integer_zerop (bias)) + bias = b; + else + { + bias = fold_convert_loc (loc, TREE_TYPE (b), bias); + bias = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (b), b, bias); + } + orig_var = TREE_OPERAND (orig_var, 0); + } + + if (TREE_CODE (orig_var) == INDIRECT_REF + || TREE_CODE (orig_var) == ADDR_EXPR) + orig_var = TREE_OPERAND (orig_var, 0); + + tree map_decl = OMP_CLAUSE_DECL (map_clause); + tree next = OMP_CLAUSE_CHAIN (map_clause); + + tree orig_bias = integer_zero_node; + if (TREE_CODE (map_decl) == ARRAY_REF) + { + if (next && OMP_CLAUSE_CODE (next) == OMP_CLAUSE_MAP + && OMP_CLAUSE_DECL (next) == orig_var + && (OMP_CLAUSE_MAP_KIND (next) == GOMP_MAP_FIRSTPRIVATE_POINTER + || OMP_CLAUSE_MAP_KIND (next) == GOMP_MAP_POINTER)) + { + orig_bias = OMP_CLAUSE_SIZE (next); + if (DECL_P (orig_bias)) + orig_bias = lookup_decl (orig_bias, outer); + orig_bias = fold_convert_loc (loc, pointer_sized_int_node, + orig_bias); + } + else + { + tree idx = TREE_OPERAND (map_decl, 1); + idx = lookup_decl (idx, outer); + idx = fold_convert_loc (loc, pointer_sized_int_node, idx); + orig_bias = fold_build2_loc (loc, MULT_EXPR, + pointer_sized_int_node, idx, + TYPE_SIZE_UNIT (TREE_TYPE (map_decl))); + } + } + + bias = fold_convert_loc (loc, pointer_sized_int_node, bias); + tree adjusted_bias = fold_build2_loc (loc, MINUS_EXPR, + pointer_sized_int_node, + bias, orig_bias); + return adjusted_bias; +} + /* Lower the OpenACC reductions of CLAUSES for compute axis LEVEL (which might be a placeholder). INNER is true if this is an inner axis of a multi-axis loop. FORK and JOIN are (optional) fork and @@ -7445,10 +8024,110 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, gcc_checking_assert (!is_oacc_kernels_decomposed_part (ctx)); tree orig = OMP_CLAUSE_DECL (c); - tree var = maybe_lookup_decl (orig, ctx); + tree orig_clause; + tree array_type = NULL_TREE; + tree array_addr = NULL_TREE, array_max_idx = NULL_TREE; + tree array_bias = NULL_TREE; + tree var; + if (TREE_CODE (orig) == MEM_REF) + { + array_type = TREE_TYPE (orig); + + tree bias = TREE_OPERAND (OMP_CLAUSE_DECL (c), 1); + tree orig_var = TREE_OPERAND (OMP_CLAUSE_DECL (c), 0); + + if (TREE_CODE (orig_var) == POINTER_PLUS_EXPR) + { + tree b = TREE_OPERAND (orig_var, 1); + if (is_omp_target (ctx->stmt)) + b = NULL_TREE; + else + b = maybe_lookup_decl (b, ctx); + if (b == NULL) + { + b = TREE_OPERAND (orig_var, 1); + b = maybe_lookup_decl_in_outer_ctx (b, ctx); + } + if (integer_zerop (bias)) + bias = b; + else + { + bias = fold_convert_loc (loc, + TREE_TYPE (b), bias); + bias = fold_build2_loc (loc, PLUS_EXPR, + TREE_TYPE (b), b, bias); + } + orig_var = TREE_OPERAND (orig_var, 0); + } + if (TREE_CODE (orig_var) == INDIRECT_REF + || TREE_CODE (orig_var) == ADDR_EXPR) + orig_var = TREE_OPERAND (orig_var, 0); + + gcc_assert (DECL_P (orig_var)); + + tree local_orig_var = lookup_decl (orig_var, ctx); + tree priv_addr = local_orig_var; + if (TREE_CODE (TREE_TYPE (priv_addr)) == ARRAY_TYPE) + priv_addr = build_fold_addr_expr (priv_addr); + + tree priv_addr_type = build_pointer_type (array_type); + + /* Peel away MEM_REF to get at base array VAR_DECL. */ + tree addr = TREE_OPERAND (orig, 0); + if (TREE_CODE (addr) == POINTER_PLUS_EXPR) + addr = TREE_OPERAND (addr, 0); + if (TREE_CODE (addr) == ADDR_EXPR) + addr = TREE_OPERAND (addr, 0); + else if (INDIRECT_REF_P (addr)) + addr = TREE_OPERAND (addr, 0); + orig = addr; + + if (omp_privatize_by_reference (orig)) + { + gcc_assert (DECL_HAS_VALUE_EXPR_P (priv_addr) + && (TREE_CODE (DECL_VALUE_EXPR (priv_addr)) + == MEM_REF)); + priv_addr = TREE_OPERAND (DECL_VALUE_EXPR (priv_addr), 0); + } + + tree tmp = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, + fold_convert (ptr_type_node, priv_addr), + fold_convert (sizetype, bias)); + priv_addr = fold_convert (priv_addr_type, tmp); + + tree addr_var = create_tmp_var (priv_addr_type, + ".array_reduction_addr"); + + gimple_seq s = NULL; + gimplify_assign (addr_var, priv_addr, &s); + gimple_seq_add_seq (&before_fork, s); + + var = create_tmp_var (integer_type_node, + ".array_reduction_data_dep"); + gimple_seq_add_stmt (&before_fork, + gimple_build_assign (var, integer_zero_node)); + + array_addr = addr_var; + array_bias = bias; + array_max_idx + = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (OMP_CLAUSE_DECL (c)))); + tree t = maybe_lookup_decl (array_max_idx, ctx); + if (t) + array_max_idx = t; + } + else + { + var = OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c); + if (!var) + var = maybe_lookup_decl (orig, ctx); + if (!var) + var = orig; + } + + tree incoming, outgoing; tree ref_to_res = NULL_TREE; - tree incoming, outgoing, v1, v2, v3; bool is_private = false; + bool is_fpp = false; enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c); if (rcode == MINUS_EXPR) @@ -7459,9 +8138,6 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, rcode = BIT_IOR_EXPR; tree op = build_int_cst (unsigned_type_node, rcode); - if (!var) - var = orig; - incoming = outgoing = var; if (!inner) @@ -7510,19 +8186,92 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, is_private = true; goto do_lookup; } + else if (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (cls) + == GOMP_MAP_FIRSTPRIVATE_POINTER) + && orig == OMP_CLAUSE_DECL (cls) + && !array_addr) + { + is_fpp = true; + goto do_lookup; + } } do_lookup: /* This is the outermost construct with this reduction, see if there's a mapping for it. */ + orig_clause = NULL_TREE; + if (gimple_code (outer->stmt) == GIMPLE_OMP_TARGET) + for (tree cls = gimple_omp_target_clauses (outer->stmt); + cls; cls = OMP_CLAUSE_CHAIN (cls)) + if (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_MAP + && orig == OMP_CLAUSE_DECL (cls) + && maybe_lookup_field (cls, outer)) + { + orig_clause = cls; + break; + } if (gimple_code (outer->stmt) == GIMPLE_OMP_TARGET - && maybe_lookup_field (orig, outer) && !is_private) + && !orig_clause + && !is_private + && maybe_lookup_field (orig, outer)) + orig_clause = orig; + if ((orig_clause != NULL_TREE || is_fpp) && !is_private) { - ref_to_res = build_receiver_ref (orig, false, outer); - if (omp_privatize_by_reference (orig)) - ref_to_res = build_simple_mem_ref (ref_to_res); - tree type = TREE_TYPE (var); + + if (is_fpp && !array_addr) + { + tree x = create_tmp_var (type); + gimplify_assign (x, lookup_decl (orig, outer), fork_seq); + ref_to_res = x; + } + else if (orig_clause) + { + ref_to_res = build_receiver_ref (orig_clause, false, outer); + if (omp_privatize_by_reference (orig)) + ref_to_res = build_simple_mem_ref (ref_to_res); + + bool ptr_ptr_array = false; + if (TREE_CODE (TREE_TYPE (orig)) == ARRAY_TYPE + && TREE_CODE (TREE_TYPE (ref_to_res)) == POINTER_TYPE + && (TREE_CODE (TREE_TYPE (TREE_TYPE (ref_to_res))) + == POINTER_TYPE)) + { + ref_to_res = build_simple_mem_ref (ref_to_res); + ptr_ptr_array = true; + } + + if (array_bias) + { + tree map_bias = integer_zero_node; + if (ptr_ptr_array) + map_bias = array_bias; + else + { + tree m = gimple_omp_target_clauses (outer->stmt); + for (; m; m = OMP_CLAUSE_CHAIN (m)) + if (OMP_CLAUSE_CODE (m) == OMP_CLAUSE_MAP) + { + tree md = OMP_CLAUSE_DECL (m); + if (orig == md + || (TREE_CODE (md) == ARRAY_REF + && TREE_OPERAND (md, 0) == orig)) + { + map_bias + = oacc_array_reduction_bias (loc, c, ctx, + m, outer); + break; + } + } + } + tree t = fold_convert (ptr_type_node, ref_to_res); + t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t, + fold_convert (sizetype, map_bias)); + ref_to_res = fold_convert (TREE_TYPE (ref_to_res), t); + } + } + if (POINTER_TYPE_P (type)) type = TREE_TYPE (type); @@ -7534,13 +8283,91 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, /* Try to look at enclosing contexts for reduction var, use original if no mapping found. */ tree t = NULL_TREE; - omp_context *c = ctx->outer; - while (c && !t) + omp_context *cp = ctx->outer; + while (cp) + { + t = maybe_lookup_decl (orig, cp); + if (t) + break; + + cp = cp->outer; + } + + if (array_addr) { - t = maybe_lookup_decl (orig, c); - c = c->outer; + if (t) + { + if (TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE) + { + if (!is_private) + { + gcc_assert (DECL_SIZE (t) + && (TREE_CODE (DECL_SIZE (t)) + != INTEGER_CST) + && DECL_HAS_VALUE_EXPR_P (t)); + t = DECL_VALUE_EXPR (t); + } + + t = fold_convert (ptr_type_node, + build_fold_addr_expr (t)); + if (array_bias) + t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t, + fold_convert (sizetype, array_bias)); + ref_to_res + = fold_convert (build_pointer_type + (TREE_TYPE (orig)), t); + } + else if (TREE_CODE (TREE_TYPE (t)) == POINTER_TYPE) + { + if (array_bias) + t = build2 (POINTER_PLUS_EXPR, ptr_type_node, t, + fold_convert (sizetype, array_bias)); + ref_to_res + = fold_convert (build_pointer_type + (array_type), t); + } + else + gcc_unreachable (); + } + else + { + gcc_assert (!cp && (gimple_code (ctx->stmt) + == GIMPLE_OMP_TARGET)); + + tree mem_ref = NULL_TREE; + tree mem_ref_clause = NULL_TREE; + tree m = gimple_omp_target_clauses (ctx->stmt); + tree orig_val = (DECL_HAS_VALUE_EXPR_P (orig) + ? DECL_VALUE_EXPR (orig) : orig); + for (; m; m = OMP_CLAUSE_CHAIN (m)) + if (OMP_CLAUSE_CODE (m) == OMP_CLAUSE_MAP) + { + tree md = OMP_CLAUSE_DECL (m); + if (orig_val == md + || (TREE_CODE (md) == MEM_REF + && INDIRECT_REF_P (orig_val) + && (TREE_OPERAND (md, 0) + == TREE_OPERAND (orig_val, 0)))) + { + mem_ref = md; + mem_ref_clause = m; + break; + } + } + gcc_assert (mem_ref); + mem_ref = build_receiver_ref (mem_ref_clause, false, ctx); + + if (array_bias) + mem_ref = build2 (POINTER_PLUS_EXPR, ptr_type_node, + mem_ref, fold_convert (sizetype, + array_bias)); + ref_to_res + = fold_convert (build_pointer_type (TREE_TYPE (orig)), + mem_ref); + } } - incoming = outgoing = (t ? t : orig); + else + incoming = outgoing = (t ? t : orig); } has_outer_reduction:; @@ -7549,37 +8376,16 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, if (!ref_to_res) ref_to_res = integer_zero_node; - if (omp_privatize_by_reference (orig)) - { - tree type = TREE_TYPE (var); - const char *id = IDENTIFIER_POINTER (DECL_NAME (var)); - - if (!inner) - { - tree x = create_tmp_var (TREE_TYPE (type), id); - gimplify_assign (var, build_fold_addr_expr (x), fork_seq); - } - - v1 = create_tmp_var (type, id); - v2 = create_tmp_var (type, id); - v3 = create_tmp_var (type, id); - - gimplify_assign (v1, var, fork_seq); - gimplify_assign (v2, var, fork_seq); - gimplify_assign (v3, var, fork_seq); + if (!array_addr) + array_addr = array_max_idx = integer_zero_node; - var = build_simple_mem_ref (var); - v1 = build_simple_mem_ref (v1); - v2 = build_simple_mem_ref (v2); - v3 = build_simple_mem_ref (v3); + if (omp_privatize_by_reference (outgoing)) + { outgoing = build_simple_mem_ref (outgoing); if (!TREE_CONSTANT (incoming)) incoming = build_simple_mem_ref (incoming); } - else - /* Note that 'var' might be a mem ref. */ - v1 = v2 = v3 = var; /* Determine position in reduction buffer, which may be used by target. The parser has ensured that this is not a @@ -7605,29 +8411,33 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, tree setup_call = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, - TREE_TYPE (var), 6, setup_code, + TREE_TYPE (var), 8, setup_code, unshare_expr (ref_to_res), unshare_expr (incoming), - level, op, off); + level, op, off, + array_addr, array_max_idx); tree init_call = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, - TREE_TYPE (var), 6, init_code, + TREE_TYPE (var), 8, init_code, unshare_expr (ref_to_res), - unshare_expr (v1), level, op, off); + unshare_expr (var), level, op, off, + array_addr, array_max_idx); tree fini_call = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, - TREE_TYPE (var), 6, fini_code, + TREE_TYPE (var), 8, fini_code, unshare_expr (ref_to_res), - unshare_expr (v2), level, op, off); + unshare_expr (var), level, op, off, + array_addr, array_max_idx); tree teardown_call = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, - TREE_TYPE (var), 6, teardown_code, - ref_to_res, unshare_expr (v3), - level, op, off); - - gimplify_assign (unshare_expr (v1), setup_call, &before_fork); - gimplify_assign (unshare_expr (v2), init_call, &after_fork); - gimplify_assign (unshare_expr (v3), fini_call, &before_join); + TREE_TYPE (var), 8, teardown_code, + ref_to_res, unshare_expr (var), + level, op, off, + array_addr, array_max_idx); + + gimplify_assign (unshare_expr (var), setup_call, &before_fork); + gimplify_assign (unshare_expr (var), init_call, &after_fork); + gimplify_assign (unshare_expr (var), fini_call, &before_join); gimplify_assign (unshare_expr (outgoing), teardown_call, &after_join); } @@ -8349,11 +9159,16 @@ lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses, gcc_unreachable (); else if (is_oacc_kernels_decomposed_part (tgt)) ; + else if (flag_openmp_target == OMP_TARGET_MODE_OMPACC + && is_omp_target (tgt->stmt)) + ; else gcc_unreachable (); - /* In a parallel region, loops are implicitly INDEPENDENT. */ - if (!tgt || is_oacc_parallel_or_serial (tgt)) + /* In a parallel region, loops without auto and seq clauses are + implicitly INDEPENDENT. */ + if ((!tgt || is_oacc_parallel_or_serial (tgt)) + && !(tag & (OLF_SEQ | OLF_AUTO))) tag |= OLF_INDEPENDENT; /* Loops inside OpenACC 'kernels' decomposed parts' regions are expected to @@ -8364,7 +9179,12 @@ lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses, gcc_assert (!(tag & OLF_AUTO)); } - if (tag & OLF_TILE) + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC + && gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL + && tgt + && ompacc_ctx_p (tgt)) + levels = 1; + else if (tag & OLF_TILE) /* Tiling could use all 3 levels. */ levels = 3; else @@ -11653,6 +12473,23 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) push_gimplify_context (); + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC && ompacc_ctx_p (ctx)) + { + enum omp_clause_code code = OMP_CLAUSE_ERROR; + if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR) + code = OMP_CLAUSE_VECTOR; + else if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) + code = OMP_CLAUSE_GANG; + if (code) + { + /* Adjust into OACC loop kind with vector/gang clause. */ + gimple_omp_for_set_kind (stmt, GF_OMP_FOR_KIND_OACC_LOOP); + tree c = build_omp_clause (UNKNOWN_LOCATION, code); + OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (stmt); + gimple_omp_for_set_clauses (stmt, c); + } + } + if (is_gimple_omp_oacc (ctx->stmt)) oacc_privatization_scan_clause_chain (ctx, gimple_omp_for_clauses (stmt)); @@ -11674,7 +12511,9 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) gbind *inner_bind = as_a <gbind *> (gimple_seq_first_stmt (omp_for_body)); tree vars = gimple_bind_vars (inner_bind); - if (is_gimple_omp_oacc (ctx->stmt)) + if (is_gimple_omp_oacc (ctx->stmt) + || (flag_openmp_target == OMP_TARGET_MODE_OMPACC + && ompacc_ctx_p (ctx))) oacc_privatization_scan_decl_chain (ctx, vars); gimple_bind_append_vars (new_stmt, vars); /* bind_vars/BLOCK_VARS are being moved to new_stmt/block, don't @@ -11790,7 +12629,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) lower_omp (gimple_omp_body_ptr (stmt), ctx); gcall *private_marker = NULL; - if (is_gimple_omp_oacc (ctx->stmt) + if ((is_gimple_omp_oacc (ctx->stmt) + || (flag_openmp_target == OMP_TARGET_MODE_OMPACC && ompacc_ctx_p (ctx))) && !gimple_seq_empty_p (omp_for_body)) private_marker = lower_oacc_private_marker (ctx); @@ -11845,11 +12685,13 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) /* Once lowered, extract the bounds and clauses. */ omp_extract_for_data (stmt, &fd, NULL); - if (is_gimple_omp_oacc (ctx->stmt) - && !ctx_in_oacc_kernels_region (ctx)) - lower_oacc_head_tail (gimple_location (stmt), - gimple_omp_for_clauses (stmt), private_marker, - &oacc_head, &oacc_tail, ctx); + if (flag_openacc) + { + if (is_gimple_omp_oacc (ctx->stmt) && !ctx_in_oacc_kernels_region (ctx)) + lower_oacc_head_tail (gimple_location (stmt), + gimple_omp_for_clauses (stmt), private_marker, + &oacc_head, &oacc_tail, ctx); + } /* Add OpenACC partitioning and reduction markers just before the loop. */ if (oacc_head) @@ -12633,9 +13475,20 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); else bind = gimple_build_bind (NULL, NULL, gimple_bind_block (par_bind)); + + gimple_seq oacc_head = NULL, oacc_tail = NULL; + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC + && gimple_code (stmt) == GIMPLE_OMP_PARALLEL + && ompacc_ctx_p (ctx)) + lower_oacc_head_tail (gimple_location (stmt), clauses, + NULL, &oacc_head, &oacc_tail, + ctx); + gsi_replace (gsi_p, dep_bind ? dep_bind : bind, true); gimple_bind_add_seq (bind, ilist); + gimple_bind_add_seq (bind, oacc_head); gimple_bind_add_stmt (bind, stmt); + gimple_bind_add_seq (bind, oacc_tail); gimple_bind_add_seq (bind, olist); pop_gimplify_context (NULL); @@ -12651,6 +13504,171 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) } } +/* Helper function for lower_omp_target. Converts VAR to something that can + be represented by a POINTER_SIZED_INT_NODE. Any new instructions are + appended to GS. This is used to optimize firstprivate variables, so that + small types (less precision than POINTER_SIZE) do not require additional + data mappings. */ + +static tree +convert_to_firstprivate_int (tree var, gimple_seq *gs) +{ + tree type = TREE_TYPE (var), new_type = NULL_TREE; + + if (omp_privatize_by_reference (var) || POINTER_TYPE_P (type)) + { + type = TREE_TYPE (type); + tree tmp = create_tmp_var (type); + gimplify_assign (tmp, build_simple_mem_ref (var), gs); + var = tmp; + } + + if (INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) + return fold_convert (pointer_sized_int_node, var); + + gcc_assert (tree_to_uhwi (TYPE_SIZE (type)) <= POINTER_SIZE); + + new_type = lang_hooks.types.type_for_size (tree_to_uhwi (TYPE_SIZE (type)), + true); + tree tmp = create_tmp_var (new_type); + var = fold_build1 (VIEW_CONVERT_EXPR, new_type, var); + gimplify_assign (tmp, var, gs); + + return fold_convert (pointer_sized_int_node, tmp); +} + +/* Like convert_to_firstprivate_int, but restore the original type. */ + +static tree +convert_from_firstprivate_int (tree var, tree orig_type, bool is_ref, + gimple_seq *gs) +{ + tree type = TREE_TYPE (var); + tree new_type = NULL_TREE; + tree tmp = NULL_TREE; + + gcc_assert (TREE_CODE (var) == MEM_REF); + var = TREE_OPERAND (var, 0); + + if (is_ref || POINTER_TYPE_P (orig_type)) + { + tree_code code = NOP_EXPR; + + if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == COMPLEX_TYPE + || VECTOR_TYPE_P (type)) + code = VIEW_CONVERT_EXPR; + + if (code == VIEW_CONVERT_EXPR + && TYPE_SIZE (type) != TYPE_SIZE (orig_type)) + { + tree ptype = build_pointer_type (type); + var = fold_build1 (code, ptype, build_fold_addr_expr (var)); + var = build_simple_mem_ref (var); + } + else + var = fold_build1 (code, type, var); + + tree inst = create_tmp_var (type); + gimplify_assign (inst, var, gs); + var = build_fold_addr_expr (inst); + + return var; + } + + if (INTEGRAL_TYPE_P (var)) + return fold_convert (type, var); + + gcc_assert (tree_to_uhwi (TYPE_SIZE (type)) <= POINTER_SIZE); + + new_type = lang_hooks.types.type_for_size (tree_to_uhwi (TYPE_SIZE (type)), + true); + + tmp = create_tmp_var (new_type); + var = fold_convert (new_type, var); + gimplify_assign (tmp, var, gs); + + return fold_build1 (VIEW_CONVERT_EXPR, type, tmp); +} + + /* Set EXPR as the hostaddr expression that should result from the clause C. + LOOPS holds the intermediate loop info. Returns the tree that should be + passed as the hostaddr. */ + +static tree +lower_omp_map_iterator_expr (tree expr, tree c, gomp_target *stmt) +{ + if (!OMP_CLAUSE_HAS_ITERATORS (c)) + return expr; + + tree iterator = OMP_CLAUSE_ITERATORS (c); + assign_to_iterator_elems_array (expr, iterator, stmt); + + tree elems = OMP_ITERATORS_ELEMS (iterator); + if (TREE_CODE (TREE_TYPE (elems)) == ARRAY_TYPE) + return build_fold_addr_expr_with_type (elems, ptr_type_node); + else + return elems; +} + +/* Set SIZE as the size expression that should result from the clause C. + LOOPS holds the intermediate loop info. Returns the tree that should be + passed as the clause size. */ + +static tree +lower_omp_map_iterator_size (tree size, tree c, gomp_target *stmt) +{ + if (!OMP_CLAUSE_HAS_ITERATORS (c)) + return size; + + tree iterator = OMP_CLAUSE_ITERATORS (c); + assign_to_iterator_elems_array (size, iterator, stmt, 1); + + return size_int (SIZE_MAX); +} + +static void +allocate_omp_iterator_elems (tree iters, gimple_seq loops_seq) +{ + tree elems = OMP_ITERATORS_ELEMS (iters); + if (!POINTER_TYPE_P (TREE_TYPE (elems))) + return; + tree arr_length = omp_iterator_elems_length (OMP_ITERATORS_COUNT (iters)); + tree call = builtin_decl_explicit (BUILT_IN_MALLOC); + tree size = fold_build2 (MULT_EXPR, size_type_node, arr_length, + TYPE_SIZE_UNIT (ptr_type_node)); + tree tmp = build_call_expr (call, 1, size); + + /* Find the first statement '<index> = -1' in the pre-loop statements. */ + tree index = OMP_ITERATORS_INDEX (iters); + gimple_stmt_iterator gsi; + for (gsi = gsi_start (loops_seq); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gimple_code (stmt) == GIMPLE_ASSIGN + && gimple_assign_lhs (stmt) == index + && gimple_assign_rhs1 (stmt) == size_int (-1)) + break; + } + gcc_assert (!gsi_end_p (gsi)); + + gimple_seq alloc_seq = NULL; + gimplify_assign (elems, tmp, &alloc_seq); + gsi_insert_seq_before (&gsi, alloc_seq, GSI_SAME_STMT); +} + +static void +free_omp_iterator_elems (tree iters, gimple_seq *seq) +{ + tree elems = OMP_ITERATORS_ELEMS (iters); + if (!POINTER_TYPE_P (TREE_TYPE (elems))) + return; + tree call = builtin_decl_explicit (BUILT_IN_FREE); + call = build_call_expr (call, 1, elems); + gimplify_and_add (call, seq); + tree clobber = build_clobber (TREE_TYPE (elems)); + gimple_seq_add_stmt (seq, gimple_build_assign (elems, clobber)); +} + /* Lower the GIMPLE_OMP_TARGET in the current statement in GSI_P. CTX holds context information for the directive. */ @@ -12789,6 +13807,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) case GOMP_MAP_DETACH: case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION: + case GOMP_MAP_TO_GRID: + case GOMP_MAP_FROM_GRID: + case GOMP_MAP_GRID_DIM: + case GOMP_MAP_GRID_STRIDE: break; case GOMP_MAP_IF_PRESENT: case GOMP_MAP_FORCE_ALLOC: @@ -12797,6 +13819,17 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) case GOMP_MAP_FORCE_TOFROM: case GOMP_MAP_FORCE_DEVICEPTR: case GOMP_MAP_DEVICE_RESIDENT: + case GOMP_MAP_NONCONTIG_ARRAY_TO: + case GOMP_MAP_NONCONTIG_ARRAY_FROM: + case GOMP_MAP_NONCONTIG_ARRAY_TOFROM: + case GOMP_MAP_NONCONTIG_ARRAY_FORCE_TO: + case GOMP_MAP_NONCONTIG_ARRAY_FORCE_FROM: + case GOMP_MAP_NONCONTIG_ARRAY_FORCE_TOFROM: + case GOMP_MAP_NONCONTIG_ARRAY_ALLOC: + case GOMP_MAP_NONCONTIG_ARRAY_FORCE_ALLOC: + case GOMP_MAP_NONCONTIG_ARRAY_FORCE_PRESENT: + case GOMP_MAP_DECLARE_ALLOCATE: + case GOMP_MAP_DECLARE_DEALLOCATE: case GOMP_MAP_LINK: case GOMP_MAP_FORCE_DETACH: gcc_assert (is_gimple_omp_oacc (stmt)); @@ -12805,6 +13838,21 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gcc_unreachable (); } #endif + if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_TO_GRID + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FROM_GRID) + { + tree nc = OMP_CLAUSE_CHAIN (c); + gcc_assert (OMP_CLAUSE_CODE (nc) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_TO_PSET); + c = nc; + while ((nc = OMP_CLAUSE_CHAIN (c)) + && OMP_CLAUSE_CODE (nc) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_GRID_DIM + || OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_GRID_STRIDE)) + c = nc; + map_cnt += 2; + continue; + } /* FALLTHRU */ case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: @@ -12872,7 +13920,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) continue; } - if (!maybe_lookup_field (var, ctx)) + if (!maybe_lookup_field (c, ctx)) continue; /* Don't remap compute constructs' reduction variables, because the @@ -12881,27 +13929,58 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) && is_gimple_omp_oacc (ctx->stmt) && OMP_CLAUSE_MAP_IN_REDUCTION (c))) { - x = build_receiver_ref (var, true, ctx); + tree var_type = TREE_TYPE (var); tree new_var = lookup_decl (var, ctx); + tree inner_type + = omp_privatize_by_reference (new_var) + ? TREE_TYPE (var_type) : var_type; + bool rcv_by_ref = + (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && GOMP_MAP_NONCONTIG_ARRAY_P (OMP_CLAUSE_MAP_KIND (c)) + && TREE_CODE (var_type) != ARRAY_TYPE + ? false : true); + + x = build_receiver_ref (c, rcv_by_ref, ctx); + + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE + && (FLOAT_TYPE_P (inner_type) + || ANY_INTEGRAL_TYPE_P (inner_type)) + && tree_to_uhwi (TYPE_SIZE (inner_type)) <= POINTER_SIZE + && !maybe_lookup_field_in_outer_ctx (var, ctx)) + { + gcc_assert (is_gimple_omp_oacc (ctx->stmt)); + x = convert_from_firstprivate_int (x, TREE_TYPE (new_var), + omp_privatize_by_reference (var), + &fplist); + gimplify_assign (new_var, x, &fplist); + map_cnt++; + break; + } if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) - && TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE) + && TREE_CODE (var_type) == ARRAY_TYPE) x = build_simple_mem_ref (x); if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); if (omp_privatize_by_reference (new_var) - && (TREE_CODE (TREE_TYPE (new_var)) != POINTER_TYPE - || DECL_BY_REFERENCE (var))) + && (TREE_CODE (var_type) != POINTER_TYPE + || DECL_BY_REFERENCE (var)) + /* Accelerators may not have alloca, so it's not + possible to privatize local storage for those + objects. */ + && TREE_CONSTANT (TYPE_SIZE (TREE_TYPE (var_type)))) { /* Create a local object to hold the instance value. */ - tree type = TREE_TYPE (TREE_TYPE (new_var)); const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var)); - tree inst = create_tmp_var (type, id); - gimplify_assign (inst, fold_indirect_ref (x), &fplist); + tree inst = create_tmp_var (TREE_TYPE (var_type), id); + if (TREE_CODE (var_type) == POINTER_TYPE) + gimplify_assign (inst, x, &fplist); + else + gimplify_assign (inst, fold_indirect_ref (x), &fplist); x = build_fold_addr_expr (inst); } gimplify_assign (new_var, x, &fplist); @@ -13054,6 +14133,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) record_vars_into (gimple_bind_vars (tgt_bind), child_fn); } + auto_vec<tree> new_iterators; + if (ctx->record_type) { if (deep_map_cnt && TREE_CODE (deep_map_cnt) == INTEGER_CST) @@ -13151,11 +14232,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) vec_alloc (vkind, map_cnt); unsigned int map_idx = 0; + vec<tree> nca_descrs = vNULL; + for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c)) switch (OMP_CLAUSE_CODE (c)) { tree ovar, nc, s, purpose, var, x, type; unsigned int talign; + bool oacc_firstprivate_int; default: break; @@ -13164,6 +14248,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: oacc_firstprivate_map: + oacc_firstprivate_int = false; nc = c; ovar = OMP_CLAUSE_DECL (c); if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP @@ -13187,9 +14272,337 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) TREE_VEC_ELT (t, 1), TREE_VEC_ELT (t, 2), deep_map_offset_data, - deep_map_offset, &ilist); + deep_map_offset, &ilist, + &new_iterators); + } + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_TO_GRID + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FROM_GRID)) + { + tree decl = OMP_CLAUSE_DECL (c); + tree dn = OMP_CLAUSE_CHAIN (c); + gcc_assert (OMP_CLAUSE_CODE (dn) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_KIND (dn) == GOMP_MAP_TO_PSET); + tree desc = OMP_CLAUSE_DECL (dn); + + tree oc, elsize = OMP_CLAUSE_SIZE (c); + tree type = TREE_TYPE (decl); + int i, dims = 0; + auto_vec<tree> tdims; + bool pointer_based = false, handled_pointer_section = false; + tree arrsize = size_one_node; + + /* Allow a single (maybe strided) array section if we have a + pointer base. */ + if (TREE_CODE (decl) == INDIRECT_REF + && (TREE_CODE (TREE_TYPE (TREE_OPERAND (decl, 0))) + == POINTER_TYPE)) + { + pointer_based = true; + dims = 1; + } + else + /* NOTE: Don't treat (e.g. Fortran, fixed-length) strings as + array types here; array section syntax isn't applicable to + strings. */ + for (tree itype = type; + TREE_CODE (itype) == ARRAY_TYPE + && !TYPE_STRING_FLAG (itype); + itype = TREE_TYPE (itype)) + { + tdims.safe_push (itype); + dims++; + } + + unsigned tdim = 0; + + vec<constructor_elt, va_gc> *vdim; + vec<constructor_elt, va_gc> *vindex; + vec<constructor_elt, va_gc> *vlen; + vec<constructor_elt, va_gc> *vstride; + vec_alloc (vdim, dims); + vec_alloc (vindex, dims); + vec_alloc (vlen, dims); + vec_alloc (vstride, dims); + + tree size_arr_type + = build_array_type_nelts (size_type_node, dims); + + tree dim_tmp = create_tmp_var (size_arr_type, ".omp_dim"); + DECL_NAMELESS (dim_tmp) = 1; + TREE_ADDRESSABLE (dim_tmp) = 1; + TREE_STATIC (dim_tmp) = 1; + tree index_tmp = create_tmp_var (size_arr_type, ".omp_index"); + DECL_NAMELESS (index_tmp) = 1; + TREE_ADDRESSABLE (index_tmp) = 1; + TREE_STATIC (index_tmp) = 1; + tree len_tmp = create_tmp_var (size_arr_type, ".omp_len"); + DECL_NAMELESS (len_tmp) = 1; + TREE_ADDRESSABLE (len_tmp) = 1; + TREE_STATIC (len_tmp) = 1; + tree stride_tmp = create_tmp_var (size_arr_type, ".omp_stride"); + DECL_NAMELESS (stride_tmp) = 1; + TREE_ADDRESSABLE (stride_tmp) = 1; + TREE_STATIC (stride_tmp) = 1; + + oc = c; + c = dn; + + tree span = NULL_TREE; + + for (i = 0; i < dims; i++) + { + nc = OMP_CLAUSE_CHAIN (c); + tree dim = NULL_TREE, index = NULL_TREE, len = NULL_TREE, + stride = size_one_node; + + if (nc + && OMP_CLAUSE_CODE (nc) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_GRID_DIM) + { + index = OMP_CLAUSE_DECL (nc); + len = OMP_CLAUSE_SIZE (nc); + + index = fold_convert (sizetype, index); + len = fold_convert (sizetype, len); + + tree nc2 = OMP_CLAUSE_CHAIN (nc); + if (nc2 + && OMP_CLAUSE_CODE (nc2) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (nc2) + == GOMP_MAP_GRID_STRIDE)) + { + stride = OMP_CLAUSE_DECL (nc2); + stride = fold_convert (sizetype, stride); + if (OMP_CLAUSE_SIZE (nc2)) + { + /* If the element size is not the same as the + distance between two adjacent array + elements (in the innermost dimension), + retrieve the latter value ("span") from the + size field of the stride. We only expect to + see one such field per array. */ + gcc_assert (!span); + span = OMP_CLAUSE_SIZE (nc2); + span = fold_convert (sizetype, span); + } + nc = nc2; + } + + if (tdim < tdims.length ()) + { + /* We have an array shape -- use that to find the + total size of the data on the target to look up + in libgomp. */ + tree dtype = TYPE_DOMAIN (tdims[tdim]); + tree minval = TYPE_MIN_VALUE (dtype); + tree maxval = TYPE_MAX_VALUE (dtype); + minval = fold_convert (sizetype, minval); + maxval = fold_convert (sizetype, maxval); + dim = size_binop (MINUS_EXPR, maxval, minval); + dim = size_binop (PLUS_EXPR, dim, + size_one_node); + arrsize = size_binop (MULT_EXPR, arrsize, dim); + } + else if (pointer_based && !handled_pointer_section) + { + /* Use the selected array section to determine the + size of the array. */ + tree tmp = size_binop (MULT_EXPR, len, stride); + tmp = size_binop (MINUS_EXPR, tmp, stride); + tmp = size_binop (PLUS_EXPR, tmp, size_one_node); + dim = size_binop (PLUS_EXPR, index, tmp); + arrsize = size_binop (MULT_EXPR, arrsize, dim); + handled_pointer_section = true; + } + else + { + if (pointer_based) + error_at (OMP_CLAUSE_LOCATION (c), + "too many array section specifiers " + "for pointer-based array"); + else + error_at (OMP_CLAUSE_LOCATION (c), + "too many array section specifiers " + "for array"); + dim = index = len = stride = error_mark_node; + } + tdim++; + + c = nc; + } + else + { + /* We have more array dimensions than array section + specifiers. Copy the whole span. */ + tree dtype = TYPE_DOMAIN (tdims[tdim]); + tree minval = TYPE_MIN_VALUE (dtype); + tree maxval = TYPE_MAX_VALUE (dtype); + minval = fold_convert (sizetype, minval); + maxval = fold_convert (sizetype, maxval); + dim = size_binop (MINUS_EXPR, maxval, minval); + dim = size_binop (PLUS_EXPR, dim, size_one_node); + len = dim; + index = minval; + nc = c; + } + + if (TREE_CODE (dim) != INTEGER_CST) + TREE_STATIC (dim_tmp) = 0; + + if (TREE_CODE (index) != INTEGER_CST) + TREE_STATIC (index_tmp) = 0; + + if (TREE_CODE (len) != INTEGER_CST) + TREE_STATIC (len_tmp) = 0; + + if (TREE_CODE (stride) != INTEGER_CST) + TREE_STATIC (stride_tmp) = 0; + + tree cidx = size_int (i); + CONSTRUCTOR_APPEND_ELT (vdim, cidx, dim); + CONSTRUCTOR_APPEND_ELT (vindex, cidx, index); + CONSTRUCTOR_APPEND_ELT (vlen, cidx, len); + CONSTRUCTOR_APPEND_ELT (vstride, cidx, stride); + } + + tree bias = size_zero_node; + tree volume = size_one_node; + tree enclosure = size_one_node; + for (i = dims - 1; i >= 0; i--) + { + tree dim = (*vdim)[i].value; + tree index = (*vindex)[i].value; + tree stride = (*vstride)[i].value; + tree len = (*vlen)[i].value; + + /* For the bias we want, e.g.: + + index[0] * stride[0] * dim[1] * dim[2] + + index[1] * stride[1] * dim[2] + + index[2] * stride[2] + + All multiplied by "span" (or "elsize"). */ + + tree index_stride = size_binop (MULT_EXPR, index, stride); + bias = size_binop (PLUS_EXPR, bias, + size_binop (MULT_EXPR, volume, + index_stride)); + volume = size_binop (MULT_EXPR, volume, dim); + + if (i == 0) + { + tree elems_covered = size_binop (MINUS_EXPR, len, + size_one_node); + elems_covered = size_binop (MULT_EXPR, elems_covered, + stride); + elems_covered = size_binop (PLUS_EXPR, elems_covered, + size_one_node); + enclosure = size_binop (MULT_EXPR, enclosure, + elems_covered); + } + else + enclosure = volume; + } + + /* If we don't have a separate span size, use the element size + instead. */ + if (!span) + span = fold_convert (sizetype, elsize); + + /* The size of a volume enclosing the elements to be + transferred. */ + OMP_CLAUSE_SIZE (oc) = size_binop (MULT_EXPR, enclosure, span); + /* And the bias of the first element we will update. */ + OMP_CLAUSE_SIZE (dn) = size_binop (MULT_EXPR, bias, span); + + tree cdim = build_constructor (size_arr_type, vdim); + tree cindex = build_constructor (size_arr_type, vindex); + tree clen = build_constructor (size_arr_type, vlen); + tree cstride = build_constructor (size_arr_type, vstride); + + if (TREE_STATIC (dim_tmp)) + DECL_INITIAL (dim_tmp) = cdim; + else + gimplify_assign (dim_tmp, cdim, &ilist); + + if (TREE_STATIC (index_tmp)) + DECL_INITIAL (index_tmp) = cindex; + else + gimplify_assign (index_tmp, cindex, &ilist); + + if (TREE_STATIC (len_tmp)) + DECL_INITIAL (len_tmp) = clen; + else + gimplify_assign (len_tmp, clen, &ilist); + + if (TREE_STATIC (stride_tmp)) + DECL_INITIAL (stride_tmp) = cstride; + else + gimplify_assign (stride_tmp, cstride, &ilist); + + tree desc_type = TREE_TYPE (desc); + + tree ndims_field = TYPE_FIELDS (desc_type); + tree elemsize_field = DECL_CHAIN (ndims_field); + tree span_field = DECL_CHAIN (elemsize_field); + tree dim_field = DECL_CHAIN (span_field); + tree index_field = DECL_CHAIN (dim_field); + tree len_field = DECL_CHAIN (index_field); + tree stride_field = DECL_CHAIN (len_field); + + vec<constructor_elt, va_gc> *v; + vec_alloc (v, 7); + + bool all_static = (TREE_STATIC (dim_tmp) + && TREE_STATIC (index_tmp) + && TREE_STATIC (len_tmp) + && TREE_STATIC (stride_tmp)); + + dim_tmp = build4 (ARRAY_REF, sizetype, dim_tmp, size_zero_node, + NULL_TREE, NULL_TREE); + dim_tmp = build_fold_addr_expr (dim_tmp); + + /* TODO: we could skip all-zeros index. */ + index_tmp = build4 (ARRAY_REF, sizetype, index_tmp, + size_zero_node, NULL_TREE, NULL_TREE); + index_tmp = build_fold_addr_expr (index_tmp); + + len_tmp = build4 (ARRAY_REF, sizetype, len_tmp, size_zero_node, + NULL_TREE, NULL_TREE); + len_tmp = build_fold_addr_expr (len_tmp); + + /* TODO: we could skip all-ones stride. */ + stride_tmp = build4 (ARRAY_REF, sizetype, stride_tmp, + size_zero_node, NULL_TREE, NULL_TREE); + stride_tmp = build_fold_addr_expr (stride_tmp); + + elsize = fold_convert (sizetype, elsize); + tree ndims = size_int (dims); + + CONSTRUCTOR_APPEND_ELT (v, ndims_field, ndims); + CONSTRUCTOR_APPEND_ELT (v, elemsize_field, elsize); + CONSTRUCTOR_APPEND_ELT (v, span_field, span); + CONSTRUCTOR_APPEND_ELT (v, dim_field, dim_tmp); + CONSTRUCTOR_APPEND_ELT (v, index_field, index_tmp); + CONSTRUCTOR_APPEND_ELT (v, len_field, len_tmp); + CONSTRUCTOR_APPEND_ELT (v, stride_field, stride_tmp); + + tree desc_ctor = build_constructor (desc_type, v); + + if (all_static) + { + TREE_STATIC (desc) = 1; + DECL_INITIAL (desc) = desc_ctor; + } + else + gimplify_assign (desc, desc_ctor, &ilist); + + OMP_CLAUSE_CHAIN (dn) = OMP_CLAUSE_CHAIN (nc); + c = oc; + nc = c; } - if (!DECL_P (ovar)) + else if (!DECL_P (ovar)) { if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)) @@ -13201,7 +14614,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } else { - tree x = build_sender_ref (ovar, ctx); + tree x = build_sender_ref (c, ctx); tree v = ovar; if (in_reduction_clauses && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP @@ -13234,6 +14647,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) *p = build_fold_indirect_ref (nd); } v = build_fold_addr_expr_with_type (v, ptr_type_node); + v = lower_omp_map_iterator_expr (v, c, stmt); gimplify_assign (x, v, &ilist); nc = NULL_TREE; } @@ -13249,7 +14663,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gcc_assert (DECL_P (ovar2)); ovar = ovar2; } - if (!maybe_lookup_field (ovar, ctx) + if (!maybe_lookup_field (c, ctx) && !(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH))) @@ -13299,26 +14713,70 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } else if (nc) { - x = build_sender_ref (ovar, ctx); + x = build_sender_ref (nc, ctx); if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) - && TREE_CODE (TREE_TYPE (ovar)) == ARRAY_TYPE) + && TREE_CODE (TREE_TYPE (ovar)) == ARRAY_TYPE + && offloaded) { - gcc_assert (offloaded); - tree avar - = create_tmp_var (TREE_TYPE (TREE_TYPE (x))); - mark_addressable (avar); - gimplify_assign (avar, build_fold_addr_expr (var), &ilist); - talign = DECL_ALIGN_UNIT (avar); + tree avar = build_fold_addr_expr (var); + if (!OMP_CLAUSE_ITERATORS (c)) + { + tree tmp = create_tmp_var (TREE_TYPE (TREE_TYPE (x))); + mark_addressable (tmp); + gimplify_assign (tmp, avar, &ilist); + avar = tmp; + } + talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (x))); avar = build_fold_addr_expr (avar); + avar = lower_omp_map_iterator_expr (avar, c, stmt); gimplify_assign (x, avar, &ilist); } + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && GOMP_MAP_NONCONTIG_ARRAY_P (OMP_CLAUSE_MAP_KIND (c))) + { + int dim_num; + tree dimensions = OMP_CLAUSE_SIZE (c); + + tree array_descr_type = + create_noncontig_array_descr_type (dimensions, &dim_num); + tree array_descr = + create_tmp_var_raw (array_descr_type, + ".omp_noncontig_array_descr"); + TREE_ADDRESSABLE (array_descr) = 1; + TREE_STATIC (array_descr) = 1; + gimple_add_tmp_var (array_descr); + + create_noncontig_array_descr_init_code + (array_descr, ovar, dimensions, dim_num, &ilist); + nca_descrs.safe_push (build_fold_addr_expr (array_descr)); + + gimplify_assign (x, (TREE_CODE (TREE_TYPE (ovar)) == ARRAY_TYPE + ? build_fold_addr_expr (ovar) : ovar), + &ilist); + } else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { - gcc_assert (is_gimple_omp_oacc (ctx->stmt)); - if (!omp_privatize_by_reference (var)) + gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); + tree new_var = lookup_decl (var, ctx); + tree type = TREE_TYPE (var); + tree inner_type + = omp_privatize_by_reference (new_var) + ? TREE_TYPE (type) : type; + if ((FLOAT_TYPE_P (inner_type) + || ANY_INTEGRAL_TYPE_P (inner_type)) + && tree_to_uhwi (TYPE_SIZE (inner_type)) <= POINTER_SIZE + && !maybe_lookup_field_in_outer_ctx (var, ctx)) + { + oacc_firstprivate_int = true; + if (is_gimple_reg (var) + && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c)) + TREE_NO_WARNING (var) = 1; + var = convert_to_firstprivate_int (var, &ilist); + } + else if (!omp_privatize_by_reference (var)) { if (is_gimple_reg (var) && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c)) @@ -13381,17 +14839,26 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); - s = TREE_TYPE (ovar); - if (TREE_CODE (s) == REFERENCE_TYPE - || omp_check_optional_argument (ovar, false)) - s = TREE_TYPE (s); - s = TYPE_SIZE_UNIT (s); + if (oacc_firstprivate_int) + s = size_int (0); + else + { + s = TREE_TYPE (ovar); + if (TREE_CODE (s) == REFERENCE_TYPE + || omp_check_optional_argument (ovar, false)) + s = TREE_TYPE (s); + s = TYPE_SIZE_UNIT (s); + } } + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && GOMP_MAP_NONCONTIG_ARRAY_P (OMP_CLAUSE_MAP_KIND (c))) + s = NULL_TREE; else s = OMP_CLAUSE_SIZE (c); if (s == NULL_TREE) s = TYPE_SIZE_UNIT (TREE_TYPE (ovar)); s = fold_convert (size_type_node, s); + s = lower_omp_map_iterator_size (s, c, stmt); purpose = size_int (map_idx++); CONSTRUCTOR_APPEND_ELT (vsize, purpose, s); if (TREE_CODE (s) != INTEGER_CST) @@ -13452,7 +14919,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) break; case OMP_CLAUSE_FIRSTPRIVATE: gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); - tkind = GOMP_MAP_TO; + if (oacc_firstprivate_int) + tkind = GOMP_MAP_FIRSTPRIVATE_INT; + else + tkind = GOMP_MAP_TO; tkind_zero = tkind; break; case OMP_CLAUSE_TO: @@ -13750,6 +15220,19 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gcc_assert (map_idx == map_cnt); + unsigned nca_num = nca_descrs.length (); + if (nca_num > 0) + { + tree nca, t = gimple_omp_target_data_arg (stmt); + int i, oldlen = TREE_VEC_LENGTH (t); + tree nt = make_tree_vec (oldlen + nca_num); + for (i = 0; i < oldlen; i++) + TREE_VEC_ELT (nt, i) = TREE_VEC_ELT (t, i); + for (i = 0; nca_descrs.iterate (i, &nca); i++) + TREE_VEC_ELT (nt, oldlen + i) = nca; + gimple_omp_target_set_data_arg (stmt, nt); + } + if (!deep_map_cnt) { DECL_INITIAL (TREE_VEC_ELT (t, 1)) @@ -14209,7 +15692,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) type = TREE_TYPE (type); ref_to_ptr = true; } - x = build_receiver_ref (OMP_CLAUSE_DECL (prev), false, ctx); + x = build_receiver_ref (prev, false, ctx); x = fold_convert_loc (clause_loc, type, x); if (!integer_zerop (OMP_CLAUSE_SIZE (c))) { @@ -14225,6 +15708,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (ref_to_array) x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x); gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue); + if (OMP_CLAUSE_MAP_POINTS_TO_READONLY (c) && VAR_P (x)) + VAR_POINTS_TO_READONLY (x) = 1; if ((is_ref && !ref_to_array) || ref_to_ptr) { @@ -14235,8 +15720,29 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_build_assign (t, x)); x = build_fold_addr_expr_loc (clause_loc, t); } - gimple_seq_add_stmt (&new_body, - gimple_build_assign (new_var, x)); + if (offloaded && is_gimple_omp_oacc (ctx->stmt) + && OMP_CLAUSE_MAP_IN_REDUCTION (prev) + && TREE_CODE (type) == POINTER_TYPE + && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE + && !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (type)))) + { + tree array_type = TREE_TYPE (type); + tree atmp + = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN); + tree al = size_int (TYPE_ALIGN (array_type)); + tree sz = TYPE_SIZE_UNIT (array_type); + tree call = build_call_expr_loc (OMP_CLAUSE_LOCATION (c), + atmp, 2, sz, al); + gimplify_assign (x, call, &new_body); + + /* In some cases, we need to preserve the pointer to array + type, as it will be passed into OpenACC reduction + internal-fns, and we require the type for proper copy + generation. */ + TREE_TYPE (x) = TREE_TYPE (new_var); + } + gimple *g = gimple_build_assign (new_var, x); + gimple_seq_add_stmt (&new_body, g); prev = NULL_TREE; } else if (OMP_CLAUSE_CHAIN (c) @@ -14296,7 +15802,9 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq fork_seq = NULL; gimple_seq join_seq = NULL; - if (offloaded && is_gimple_omp_oacc (ctx->stmt)) + if (offloaded && (is_gimple_omp_oacc (ctx->stmt) + || (flag_openmp_target == OMP_TARGET_MODE_OMPACC + && ompacc_ctx_p (ctx)))) { /* If there are reductions on the offloaded region itself, treat them as a dummy GANG loop. */ @@ -14324,6 +15832,23 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_omp_set_body (stmt, new_body); } + for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_HAS_ITERATORS (c)) + allocate_omp_iterator_elems (OMP_CLAUSE_ITERATORS (c), + gimple_omp_target_iterator_loops (stmt)); + unsigned i; + tree it; + FOR_EACH_VEC_ELT (new_iterators, i, it) + allocate_omp_iterator_elems (it, gimple_omp_target_iterator_loops (stmt)); + gsi_insert_seq_before (gsi_p, gimple_omp_target_iterator_loops (stmt), + GSI_SAME_STMT); + gimple_omp_target_set_iterator_loops (stmt, NULL); + for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_HAS_ITERATORS (c)) + free_omp_iterator_elems (OMP_CLAUSE_ITERATORS (c), &olist); + FOR_EACH_VEC_ELT (new_iterators, i, it) + free_omp_iterator_elems (it, &olist); + bind = gimple_build_bind (NULL, NULL, tgt_bind ? gimple_bind_block (tgt_bind) : NULL_TREE); @@ -14635,6 +16160,22 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx) lower_omp (gimple_omp_body_ptr (teams_stmt), ctx); lower_reduction_clauses (gimple_omp_teams_clauses (teams_stmt), &olist, NULL, ctx); + + if (flag_openmp_target == OMP_TARGET_MODE_OMPACC && ompacc_ctx_p (ctx)) + { + /* Forward the team/gang-wide variables to outer target region. */ + struct omp_context *tgt = ctx; + while (tgt && !is_gimple_omp_offloaded (tgt->stmt)) + tgt = tgt->outer; + if (tgt) + { + int i; + tree decl; + FOR_EACH_VEC_ELT (ctx->oacc_privatization_candidates, i, decl) + tgt->oacc_privatization_candidates.safe_push (decl); + } + } + gimple_seq_add_stmt (&bind_body, teams_stmt); gimple_seq_add_seq (&bind_body, gimple_omp_body (teams_stmt)); @@ -14802,7 +16343,9 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx) ctx); break; case GIMPLE_BIND: - if (ctx && is_gimple_omp_oacc (ctx->stmt)) + if (ctx && (is_gimple_omp_oacc (ctx->stmt) + || (flag_openmp_target == OMP_TARGET_MODE_OMPACC + && ompacc_ctx_p (ctx)))) { tree vars = gimple_bind_vars (as_a <gbind *> (stmt)); oacc_privatization_scan_decl_chain (ctx, vars); @@ -15044,6 +16587,68 @@ lower_omp (gimple_seq *body, omp_context *ctx) input_location = saved_location; } +/* Emit a constructor function to enable -foffload-memory=pinned + at runtime. Libgomp handles the OS mode setting, but we need to trigger + it by calling GOMP_enable_pinned mode before the program proper runs. */ + +static void +omp_enable_pinned_mode () +{ + static bool visited = false; + if (visited) + return; + visited = true; + + /* Create a new function like this: + + static void __attribute__((constructor)) + __set_pinned_mode () + { + GOMP_enable_pinned_mode (); + } + */ + + tree name = get_identifier ("__set_pinned_mode"); + tree voidfntype = build_function_type_list (void_type_node, NULL_TREE); + tree decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name, voidfntype); + + TREE_STATIC (decl) = 1; + TREE_USED (decl) = 1; + DECL_ARTIFICIAL (decl) = 1; + DECL_IGNORED_P (decl) = 0; + TREE_PUBLIC (decl) = 0; + DECL_UNINLINABLE (decl) = 1; + DECL_EXTERNAL (decl) = 0; + DECL_CONTEXT (decl) = NULL_TREE; + DECL_INITIAL (decl) = make_node (BLOCK); + BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl; + DECL_STATIC_CONSTRUCTOR (decl) = 1; + DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("constructor"), + NULL_TREE, NULL_TREE); + + tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, + void_type_node); + DECL_ARTIFICIAL (t) = 1; + DECL_IGNORED_P (t) = 1; + DECL_CONTEXT (t) = decl; + DECL_RESULT (decl) = t; + + push_struct_function (decl); + init_tree_ssa (cfun); + + tree calldecl = builtin_decl_explicit (BUILT_IN_GOMP_ENABLE_PINNED_MODE); + gcall *call = gimple_build_call (calldecl, 0); + + gimple_seq seq = NULL; + gimple_seq_add_stmt (&seq, call); + gimple_set_body (decl, gimple_build_bind (NULL_TREE, seq, NULL)); + + cfun->function_end_locus = UNKNOWN_LOCATION; + cfun->curr_properties |= PROP_gimple_any; + pop_cfun (); + cgraph_node::add_new_function (decl, true); +} + /* Main entry point. */ static unsigned int @@ -15100,6 +16705,10 @@ execute_lower_omp (void) for (auto task_stmt : task_cpyfns) finalize_task_copyfn (task_stmt); task_cpyfns.release (); + + if (flag_offload_memory == OFFLOAD_MEMORY_PINNED) + omp_enable_pinned_mode (); + return 0; } |