diff options
Diffstat (limited to 'gcc/omp-general.c')
-rw-r--r-- | gcc/omp-general.c | 842 |
1 files changed, 786 insertions, 56 deletions
diff --git a/gcc/omp-general.c b/gcc/omp-general.c index 49023f4..8e2665a 100644 --- a/gcc/omp-general.c +++ b/gcc/omp-general.c @@ -39,9 +39,9 @@ along with GCC; see the file COPYING3. If not see #include "cgraph.h" #include "alloc-pool.h" #include "symbol-summary.h" -#include "hsa-common.h" #include "tree-pass.h" #include "omp-device-properties.h" +#include "tree-iterator.h" enum omp_requires omp_requires_mask; @@ -200,14 +200,20 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, fd->have_pointer_condtemp = false; fd->have_scantemp = false; fd->have_nonctrl_scantemp = false; + fd->non_rect = false; fd->lastprivate_conditional = 0; fd->tiling = NULL_TREE; fd->collapse = 1; fd->ordered = 0; + fd->first_nonrect = -1; + fd->last_nonrect = -1; fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; fd->sched_modifiers = 0; fd->chunk_size = NULL_TREE; fd->simd_schedule = false; + fd->first_inner_iterations = NULL_TREE; + fd->factor = NULL_TREE; + fd->adjn1 = NULL_TREE; collapse_iter = NULL; collapse_count = NULL; @@ -311,6 +317,44 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, } int cnt = fd->ordered ? fd->ordered : fd->collapse; + int single_nonrect = -1; + tree single_nonrect_count = NULL_TREE; + enum tree_code single_nonrect_cond_code = ERROR_MARK; + for (i = 1; i < cnt; i++) + { + tree n1 = gimple_omp_for_initial (for_stmt, i); + tree n2 = gimple_omp_for_final (for_stmt, i); + if (TREE_CODE (n1) == TREE_VEC) + { + if (fd->non_rect) + { + single_nonrect = -1; + break; + } + for (int j = i - 1; j >= 0; j--) + if (TREE_VEC_ELT (n1, 0) == gimple_omp_for_index (for_stmt, j)) + { + single_nonrect = j; + break; + } + fd->non_rect = true; + } + else if (TREE_CODE (n2) == TREE_VEC) + { + if (fd->non_rect) + { + single_nonrect = -1; + break; + } + for (int j = i - 1; j >= 0; j--) + if (TREE_VEC_ELT (n2, 0) == gimple_omp_for_index (for_stmt, j)) + { + single_nonrect = j; + break; + } + fd->non_rect = true; + } + } for (i = 0; i < cnt; i++) { if (i == 0 @@ -329,12 +373,56 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, || TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE); var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v; loop->n1 = gimple_omp_for_initial (for_stmt, i); + loop->m1 = NULL_TREE; + loop->m2 = NULL_TREE; + loop->outer = 0; + loop->non_rect_referenced = false; + if (TREE_CODE (loop->n1) == TREE_VEC) + { + for (int j = i - 1; j >= 0; j--) + if (TREE_VEC_ELT (loop->n1, 0) == gimple_omp_for_index (for_stmt, j)) + { + loop->outer = i - j; + if (loops != NULL) + loops[j].non_rect_referenced = true; + if (fd->first_nonrect == -1 || fd->first_nonrect > j) + fd->first_nonrect = j; + break; + } + gcc_assert (loop->outer); + loop->m1 = TREE_VEC_ELT (loop->n1, 1); + loop->n1 = TREE_VEC_ELT (loop->n1, 2); + fd->non_rect = true; + fd->last_nonrect = i; + } loop->cond_code = gimple_omp_for_cond (for_stmt, i); loop->n2 = gimple_omp_for_final (for_stmt, i); gcc_assert (loop->cond_code != NE_EXPR || (gimple_omp_for_kind (for_stmt) != GF_OMP_FOR_KIND_OACC_LOOP)); + if (TREE_CODE (loop->n2) == TREE_VEC) + { + if (loop->outer) + gcc_assert (TREE_VEC_ELT (loop->n2, 0) + == gimple_omp_for_index (for_stmt, i - loop->outer)); + else + for (int j = i - 1; j >= 0; j--) + if (TREE_VEC_ELT (loop->n2, 0) == gimple_omp_for_index (for_stmt, j)) + { + loop->outer = i - j; + if (loops != NULL) + loops[j].non_rect_referenced = true; + if (fd->first_nonrect == -1 || fd->first_nonrect > j) + fd->first_nonrect = j; + break; + } + gcc_assert (loop->outer); + loop->m2 = TREE_VEC_ELT (loop->n2, 1); + loop->n2 = TREE_VEC_ELT (loop->n2, 2); + fd->non_rect = true; + fd->last_nonrect = i; + } t = gimple_omp_for_incr (for_stmt, i); gcc_assert (TREE_OPERAND (t, 0) == var); @@ -371,7 +459,9 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, loop->n2, loop->step); else n = loop->n1; - if (TREE_CODE (n) != INTEGER_CST + if (loop->m1 + || loop->m2 + || TREE_CODE (n) != INTEGER_CST || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), n)) iter_type = long_long_unsigned_type_node; } @@ -392,7 +482,9 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, loop->n2, loop->step); n2 = loop->n1; } - if (TREE_CODE (n1) != INTEGER_CST + if (loop->m1 + || loop->m2 + || TREE_CODE (n1) != INTEGER_CST || TREE_CODE (n2) != INTEGER_CST || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), n1) || !tree_int_cst_lt (n2, TYPE_MAX_VALUE (iter_type))) @@ -405,9 +497,214 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, if (collapse_count && *collapse_count == NULL) { - t = fold_binary (loop->cond_code, boolean_type_node, - fold_convert (TREE_TYPE (loop->v), loop->n1), - fold_convert (TREE_TYPE (loop->v), loop->n2)); + if (count && integer_zerop (count)) + continue; + tree n1first = NULL_TREE, n2first = NULL_TREE; + tree n1last = NULL_TREE, n2last = NULL_TREE; + tree ostep = NULL_TREE; + if (loop->m1 || loop->m2) + { + if (count == NULL_TREE) + continue; + if (single_nonrect == -1 + || (loop->m1 && TREE_CODE (loop->m1) != INTEGER_CST) + || (loop->m2 && TREE_CODE (loop->m2) != INTEGER_CST) + || TREE_CODE (loop->n1) != INTEGER_CST + || TREE_CODE (loop->n2) != INTEGER_CST + || TREE_CODE (loop->step) != INTEGER_CST) + { + count = NULL_TREE; + continue; + } + tree var = gimple_omp_for_initial (for_stmt, single_nonrect); + tree itype = TREE_TYPE (var); + tree first = gimple_omp_for_initial (for_stmt, single_nonrect); + t = gimple_omp_for_incr (for_stmt, single_nonrect); + ostep = omp_get_for_step_from_incr (loc, t); + t = fold_binary (MINUS_EXPR, long_long_unsigned_type_node, + single_nonrect_count, + build_one_cst (long_long_unsigned_type_node)); + t = fold_convert (itype, t); + first = fold_convert (itype, first); + ostep = fold_convert (itype, ostep); + tree last = fold_binary (PLUS_EXPR, itype, first, + fold_binary (MULT_EXPR, itype, t, + ostep)); + if (TREE_CODE (first) != INTEGER_CST + || TREE_CODE (last) != INTEGER_CST) + { + count = NULL_TREE; + continue; + } + if (loop->m1) + { + tree m1 = fold_convert (itype, loop->m1); + tree n1 = fold_convert (itype, loop->n1); + n1first = fold_binary (PLUS_EXPR, itype, + fold_binary (MULT_EXPR, itype, + first, m1), n1); + n1last = fold_binary (PLUS_EXPR, itype, + fold_binary (MULT_EXPR, itype, + last, m1), n1); + } + else + n1first = n1last = loop->n1; + if (loop->m2) + { + tree n2 = fold_convert (itype, loop->n2); + tree m2 = fold_convert (itype, loop->m2); + n2first = fold_binary (PLUS_EXPR, itype, + fold_binary (MULT_EXPR, itype, + first, m2), n2); + n2last = fold_binary (PLUS_EXPR, itype, + fold_binary (MULT_EXPR, itype, + last, m2), n2); + } + else + n2first = n2last = loop->n2; + n1first = fold_convert (TREE_TYPE (loop->v), n1first); + n2first = fold_convert (TREE_TYPE (loop->v), n2first); + n1last = fold_convert (TREE_TYPE (loop->v), n1last); + n2last = fold_convert (TREE_TYPE (loop->v), n2last); + t = fold_binary (loop->cond_code, boolean_type_node, + n1first, n2first); + tree t2 = fold_binary (loop->cond_code, boolean_type_node, + n1last, n2last); + if (t && t2 && integer_nonzerop (t) && integer_nonzerop (t2)) + /* All outer loop iterators have at least one inner loop + iteration. Try to compute the count at compile time. */ + t = NULL_TREE; + else if (t && t2 && integer_zerop (t) && integer_zerop (t2)) + /* No iterations of the inner loop. count will be set to + zero cst below. */; + else if (TYPE_UNSIGNED (itype) + || t == NULL_TREE + || t2 == NULL_TREE + || TREE_CODE (t) != INTEGER_CST + || TREE_CODE (t2) != INTEGER_CST) + { + /* Punt (for now). */ + count = NULL_TREE; + continue; + } + else + { + /* Some iterations of the outer loop have zero iterations + of the inner loop, while others have at least one. + In this case, we need to adjust one of those outer + loop bounds. If ADJ_FIRST, we need to adjust outer n1 + (first), otherwise outer n2 (last). */ + bool adj_first = integer_zerop (t); + tree n1 = fold_convert (itype, loop->n1); + tree n2 = fold_convert (itype, loop->n2); + tree m1 = loop->m1 ? fold_convert (itype, loop->m1) + : build_zero_cst (itype); + tree m2 = loop->m2 ? fold_convert (itype, loop->m2) + : build_zero_cst (itype); + t = fold_binary (MINUS_EXPR, itype, n1, n2); + t2 = fold_binary (MINUS_EXPR, itype, m2, m1); + t = fold_binary (TRUNC_DIV_EXPR, itype, t, t2); + t2 = fold_binary (MINUS_EXPR, itype, t, first); + t2 = fold_binary (TRUNC_MOD_EXPR, itype, t2, ostep); + t = fold_binary (MINUS_EXPR, itype, t, t2); + tree n1cur + = fold_binary (PLUS_EXPR, itype, n1, + fold_binary (MULT_EXPR, itype, m1, t)); + tree n2cur + = fold_binary (PLUS_EXPR, itype, n2, + fold_binary (MULT_EXPR, itype, m2, t)); + t2 = fold_binary (loop->cond_code, boolean_type_node, + n1cur, n2cur); + tree t3 = fold_binary (MULT_EXPR, itype, m1, ostep); + tree t4 = fold_binary (MULT_EXPR, itype, m2, ostep); + tree diff; + if (adj_first) + { + tree new_first; + if (integer_nonzerop (t2)) + { + new_first = t; + n1first = n1cur; + n2first = n2cur; + if (flag_checking) + { + t3 = fold_binary (MINUS_EXPR, itype, n1cur, t3); + t4 = fold_binary (MINUS_EXPR, itype, n2cur, t4); + t3 = fold_binary (loop->cond_code, + boolean_type_node, t3, t4); + gcc_assert (integer_zerop (t3)); + } + } + else + { + t3 = fold_binary (PLUS_EXPR, itype, n1cur, t3); + t4 = fold_binary (PLUS_EXPR, itype, n2cur, t4); + new_first = fold_binary (PLUS_EXPR, itype, t, ostep); + n1first = t3; + n2first = t4; + if (flag_checking) + { + t3 = fold_binary (loop->cond_code, + boolean_type_node, t3, t4); + gcc_assert (integer_nonzerop (t3)); + } + } + diff = fold_binary (MINUS_EXPR, itype, new_first, first); + first = new_first; + fd->adjn1 = first; + } + else + { + tree new_last; + if (integer_zerop (t2)) + { + t3 = fold_binary (MINUS_EXPR, itype, n1cur, t3); + t4 = fold_binary (MINUS_EXPR, itype, n2cur, t4); + new_last = fold_binary (MINUS_EXPR, itype, t, ostep); + n1last = t3; + n2last = t4; + if (flag_checking) + { + t3 = fold_binary (loop->cond_code, + boolean_type_node, t3, t4); + gcc_assert (integer_nonzerop (t3)); + } + } + else + { + new_last = t; + n1last = n1cur; + n2last = n2cur; + if (flag_checking) + { + t3 = fold_binary (PLUS_EXPR, itype, n1cur, t3); + t4 = fold_binary (PLUS_EXPR, itype, n2cur, t4); + t3 = fold_binary (loop->cond_code, + boolean_type_node, t3, t4); + gcc_assert (integer_zerop (t3)); + } + } + diff = fold_binary (MINUS_EXPR, itype, last, new_last); + } + if (TYPE_UNSIGNED (itype) + && single_nonrect_cond_code == GT_EXPR) + diff = fold_binary (TRUNC_DIV_EXPR, itype, + fold_unary (NEGATE_EXPR, itype, diff), + fold_unary (NEGATE_EXPR, itype, + ostep)); + else + diff = fold_binary (TRUNC_DIV_EXPR, itype, diff, ostep); + diff = fold_convert (long_long_unsigned_type_node, diff); + single_nonrect_count + = fold_binary (MINUS_EXPR, long_long_unsigned_type_node, + single_nonrect_count, diff); + t = NULL_TREE; + } + } + else + t = fold_binary (loop->cond_code, boolean_type_node, + fold_convert (TREE_TYPE (loop->v), loop->n1), + fold_convert (TREE_TYPE (loop->v), loop->n2)); if (t && integer_zerop (t)) count = build_zero_cst (long_long_unsigned_type_node); else if ((i == 0 || count != NULL_TREE) @@ -421,31 +718,77 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, if (POINTER_TYPE_P (itype)) itype = signed_type_for (itype); t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2_loc (loc, PLUS_EXPR, itype, - fold_convert_loc (loc, itype, loop->step), - t); - t = fold_build2_loc (loc, PLUS_EXPR, itype, t, - fold_convert_loc (loc, itype, loop->n2)); - t = fold_build2_loc (loc, MINUS_EXPR, itype, t, - fold_convert_loc (loc, itype, loop->n1)); - if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR) + t = fold_build2 (PLUS_EXPR, itype, + fold_convert (itype, loop->step), t); + tree n1 = loop->n1; + tree n2 = loop->n2; + if (loop->m1 || loop->m2) { - tree step = fold_convert_loc (loc, itype, loop->step); - t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, - fold_build1_loc (loc, NEGATE_EXPR, - itype, t), - fold_build1_loc (loc, NEGATE_EXPR, - itype, step)); + gcc_assert (single_nonrect != -1); + n1 = n1first; + n2 = n2first; } + t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2)); + t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); + tree step = fold_convert_loc (loc, itype, loop->step); + if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, step)); else - t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, t, - fold_convert_loc (loc, itype, - loop->step)); - t = fold_convert_loc (loc, long_long_unsigned_type_node, t); - if (count != NULL_TREE) - count = fold_build2_loc (loc, MULT_EXPR, - long_long_unsigned_type_node, - count, t); + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + tree llutype = long_long_unsigned_type_node; + t = fold_convert (llutype, t); + if (loop->m1 || loop->m2) + { + /* t is number of iterations of inner loop at either first + or last value of the outer iterator (the one with fewer + iterations). + Compute t2 = ((m2 - m1) * ostep) / step + and niters = outer_count * t + + t2 * ((outer_count - 1) * outer_count / 2) + */ + tree m1 = loop->m1 ? loop->m1 : integer_zero_node; + tree m2 = loop->m2 ? loop->m2 : integer_zero_node; + m1 = fold_convert (itype, m1); + m2 = fold_convert (itype, m2); + tree t2 = fold_build2 (MINUS_EXPR, itype, m2, m1); + t2 = fold_build2 (MULT_EXPR, itype, t2, ostep); + if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR) + t2 = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t2), + fold_build1 (NEGATE_EXPR, itype, step)); + else + t2 = fold_build2 (TRUNC_DIV_EXPR, itype, t2, step); + t2 = fold_convert (llutype, t2); + fd->first_inner_iterations = t; + fd->factor = t2; + t = fold_build2 (MULT_EXPR, llutype, t, + single_nonrect_count); + tree t3 = fold_build2 (MINUS_EXPR, llutype, + single_nonrect_count, + build_one_cst (llutype)); + t3 = fold_build2 (MULT_EXPR, llutype, t3, + single_nonrect_count); + t3 = fold_build2 (TRUNC_DIV_EXPR, llutype, t3, + build_int_cst (llutype, 2)); + t2 = fold_build2 (MULT_EXPR, llutype, t2, t3); + t = fold_build2 (PLUS_EXPR, llutype, t, t2); + } + if (i == single_nonrect) + { + if (integer_zerop (t) || TREE_CODE (t) != INTEGER_CST) + count = t; + else + { + single_nonrect_count = t; + single_nonrect_cond_code = loop->cond_code; + if (count == NULL_TREE) + count = build_one_cst (llutype); + } + } + else if (count != NULL_TREE) + count = fold_build2 (MULT_EXPR, llutype, count, t); else count = t; if (TREE_CODE (count) != INTEGER_CST) @@ -474,7 +817,18 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, if (collapse_count && *collapse_count == NULL) { if (count) - *collapse_count = fold_convert_loc (loc, iter_type, count); + { + *collapse_count = fold_convert_loc (loc, iter_type, count); + if (fd->first_inner_iterations && fd->factor) + { + t = make_tree_vec (4); + TREE_VEC_ELT (t, 0) = *collapse_count; + TREE_VEC_ELT (t, 1) = fd->first_inner_iterations; + TREE_VEC_ELT (t, 2) = fd->factor; + TREE_VEC_ELT (t, 3) = fd->adjn1; + *collapse_count = t; + } + } else *collapse_count = create_tmp_var (iter_type, ".count"); } @@ -484,7 +838,18 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, fd->loop.v = *collapse_iter; fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0); fd->loop.n2 = *collapse_count; + if (TREE_CODE (fd->loop.n2) == TREE_VEC) + { + gcc_assert (fd->non_rect); + fd->first_inner_iterations = TREE_VEC_ELT (fd->loop.n2, 1); + fd->factor = TREE_VEC_ELT (fd->loop.n2, 2); + fd->adjn1 = TREE_VEC_ELT (fd->loop.n2, 3); + fd->loop.n2 = TREE_VEC_ELT (fd->loop.n2, 0); + } fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1); + fd->loop.m1 = NULL_TREE; + fd->loop.m2 = NULL_TREE; + fd->loop.outer = 0; fd->loop.cond_code = LT_EXPR; } else if (loops) @@ -504,6 +869,61 @@ omp_build_barrier (tree lhs) return g; } +/* Find OMP_FOR resp. OMP_SIMD with non-NULL OMP_FOR_INIT. Also, fill in pdata + array, pdata[0] non-NULL if there is anything non-trivial in between, + pdata[1] is address of OMP_PARALLEL in between if any, pdata[2] is address + of OMP_FOR in between if any and pdata[3] is address of the inner + OMP_FOR/OMP_SIMD. */ + +tree +find_combined_omp_for (tree *tp, int *walk_subtrees, void *data) +{ + tree **pdata = (tree **) data; + *walk_subtrees = 0; + switch (TREE_CODE (*tp)) + { + case OMP_FOR: + if (OMP_FOR_INIT (*tp) != NULL_TREE) + { + pdata[3] = tp; + return *tp; + } + pdata[2] = tp; + *walk_subtrees = 1; + break; + case OMP_SIMD: + if (OMP_FOR_INIT (*tp) != NULL_TREE) + { + pdata[3] = tp; + return *tp; + } + break; + case BIND_EXPR: + if (BIND_EXPR_VARS (*tp) + || (BIND_EXPR_BLOCK (*tp) + && BLOCK_VARS (BIND_EXPR_BLOCK (*tp)))) + pdata[0] = tp; + *walk_subtrees = 1; + break; + case STATEMENT_LIST: + if (!tsi_one_before_end_p (tsi_start (*tp))) + pdata[0] = tp; + *walk_subtrees = 1; + break; + case TRY_FINALLY_EXPR: + pdata[0] = tp; + *walk_subtrees = 1; + break; + case OMP_PARALLEL: + pdata[1] = tp; + *walk_subtrees = 1; + break; + default: + break; + } + return NULL_TREE; +} + /* Return maximum possible vectorization factor for the target. */ poly_uint64 @@ -631,17 +1051,17 @@ omp_offload_device_kind_arch_isa (const char *props, const char *prop) static bool omp_maybe_offloaded (void) { - if (!hsa_gen_requested_p ()) - { - if (!ENABLE_OFFLOADING) - return false; - const char *names = getenv ("OFFLOAD_TARGET_NAMES"); - if (names == NULL || *names == '\0') - return false; - } + if (!ENABLE_OFFLOADING) + return false; + const char *names = getenv ("OFFLOAD_TARGET_NAMES"); + if (names == NULL || *names == '\0') + return false; + if (symtab->state == PARSING) /* Maybe. */ return true; + if (cfun && cfun->after_inlining) + return false; if (current_function_decl && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (current_function_decl))) @@ -694,8 +1114,7 @@ omp_context_selector_matches (tree ctx) (so in most of the cases), and we'd need to maintain set of surrounding OpenMP constructs, which is better handled during gimplification. */ - if (symtab->state == PARSING - || (cfun->curr_properties & PROP_gimple_any) != 0) + if (symtab->state == PARSING) { ret = -1; continue; @@ -704,6 +1123,28 @@ omp_context_selector_matches (tree ctx) enum tree_code constructs[5]; int nconstructs = omp_constructor_traits_to_codes (TREE_VALUE (t1), constructs); + + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + { + if (!cfun->after_inlining) + { + ret = -1; + continue; + } + int i; + for (i = 0; i < nconstructs; ++i) + if (constructs[i] == OMP_SIMD) + break; + if (i < nconstructs) + { + ret = -1; + continue; + } + /* If there is no simd, assume it is ok after IPA, + constructs should have been checked before. */ + continue; + } + int r = omp_construct_selector_matches (constructs, nconstructs, NULL); if (r == 0) @@ -738,6 +1179,9 @@ omp_context_selector_matches (tree ctx) case 'a': if (set == 'i' && !strcmp (sel, "atomic_default_mem_order")) { + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + break; + enum omp_memory_order omo = ((enum omp_memory_order) (omp_requires_mask @@ -787,12 +1231,6 @@ omp_context_selector_matches (tree ctx) also offloading values. */ if (!omp_maybe_offloaded ()) return 0; - if (strcmp (arch, "hsa") == 0 - && hsa_gen_requested_p ()) - { - ret = -1; - continue; - } if (ENABLE_OFFLOADING) { const char *arches = omp_offload_device_arch; @@ -816,6 +1254,9 @@ omp_context_selector_matches (tree ctx) case 'u': if (set == 'i' && !strcmp (sel, "unified_address")) { + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + break; + if ((omp_requires_mask & OMP_REQUIRES_UNIFIED_ADDRESS) == 0) { if (symtab->state == PARSING) @@ -827,6 +1268,9 @@ omp_context_selector_matches (tree ctx) } if (set == 'i' && !strcmp (sel, "unified_shared_memory")) { + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + break; + if ((omp_requires_mask & OMP_REQUIRES_UNIFIED_SHARED_MEMORY) == 0) { @@ -841,6 +1285,9 @@ omp_context_selector_matches (tree ctx) case 'd': if (set == 'i' && !strcmp (sel, "dynamic_allocators")) { + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + break; + if ((omp_requires_mask & OMP_REQUIRES_DYNAMIC_ALLOCATORS) == 0) { @@ -855,6 +1302,9 @@ omp_context_selector_matches (tree ctx) case 'r': if (set == 'i' && !strcmp (sel, "reverse_offload")) { + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + break; + if ((omp_requires_mask & OMP_REQUIRES_REVERSE_OFFLOAD) == 0) { if (symtab->state == PARSING) @@ -901,12 +1351,6 @@ omp_context_selector_matches (tree ctx) also offloading values. */ if (!omp_maybe_offloaded ()) return 0; - if (strcmp (prop, "gpu") == 0 - && hsa_gen_requested_p ()) - { - ret = -1; - continue; - } if (ENABLE_OFFLOADING) { const char *kinds = omp_offload_device_kind; @@ -944,7 +1388,8 @@ omp_context_selector_matches (tree ctx) #pragma omp declare simd on it, some simd clones might have the isa added later on. */ if (r == -1 - && targetm.simd_clone.compute_vecsize_and_simdlen) + && targetm.simd_clone.compute_vecsize_and_simdlen + && (cfun == NULL || !cfun->after_inlining)) { tree attrs = DECL_ATTRIBUTES (current_function_decl); @@ -1415,6 +1860,213 @@ omp_context_compute_score (tree ctx, widest_int *score, bool declare_simd) return ret; } +/* Class describing a single variant. */ +struct GTY(()) omp_declare_variant_entry { + /* NODE of the variant. */ + cgraph_node *variant; + /* Score if not in declare simd clone. */ + widest_int score; + /* Score if in declare simd clone. */ + widest_int score_in_declare_simd_clone; + /* Context selector for the variant. */ + tree ctx; + /* True if the context selector is known to match already. */ + bool matches; +}; + +/* Class describing a function with variants. */ +struct GTY((for_user)) omp_declare_variant_base_entry { + /* NODE of the base function. */ + cgraph_node *base; + /* NODE of the artificial function created for the deferred variant + resolution. */ + cgraph_node *node; + /* Vector of the variants. */ + vec<omp_declare_variant_entry, va_gc> *variants; +}; + +struct omp_declare_variant_hasher + : ggc_ptr_hash<omp_declare_variant_base_entry> { + static hashval_t hash (omp_declare_variant_base_entry *); + static bool equal (omp_declare_variant_base_entry *, + omp_declare_variant_base_entry *); +}; + +hashval_t +omp_declare_variant_hasher::hash (omp_declare_variant_base_entry *x) +{ + inchash::hash hstate; + hstate.add_int (DECL_UID (x->base->decl)); + hstate.add_int (x->variants->length ()); + omp_declare_variant_entry *variant; + unsigned int i; + FOR_EACH_VEC_SAFE_ELT (x->variants, i, variant) + { + hstate.add_int (DECL_UID (variant->variant->decl)); + hstate.add_wide_int (variant->score); + hstate.add_wide_int (variant->score_in_declare_simd_clone); + hstate.add_ptr (variant->ctx); + hstate.add_int (variant->matches); + } + return hstate.end (); +} + +bool +omp_declare_variant_hasher::equal (omp_declare_variant_base_entry *x, + omp_declare_variant_base_entry *y) +{ + if (x->base != y->base + || x->variants->length () != y->variants->length ()) + return false; + omp_declare_variant_entry *variant; + unsigned int i; + FOR_EACH_VEC_SAFE_ELT (x->variants, i, variant) + if (variant->variant != (*y->variants)[i].variant + || variant->score != (*y->variants)[i].score + || (variant->score_in_declare_simd_clone + != (*y->variants)[i].score_in_declare_simd_clone) + || variant->ctx != (*y->variants)[i].ctx + || variant->matches != (*y->variants)[i].matches) + return false; + return true; +} + +static GTY(()) hash_table<omp_declare_variant_hasher> *omp_declare_variants; + +struct omp_declare_variant_alt_hasher + : ggc_ptr_hash<omp_declare_variant_base_entry> { + static hashval_t hash (omp_declare_variant_base_entry *); + static bool equal (omp_declare_variant_base_entry *, + omp_declare_variant_base_entry *); +}; + +hashval_t +omp_declare_variant_alt_hasher::hash (omp_declare_variant_base_entry *x) +{ + return DECL_UID (x->node->decl); +} + +bool +omp_declare_variant_alt_hasher::equal (omp_declare_variant_base_entry *x, + omp_declare_variant_base_entry *y) +{ + return x->node == y->node; +} + +static GTY(()) hash_table<omp_declare_variant_alt_hasher> + *omp_declare_variant_alt; + +/* Try to resolve declare variant after gimplification. */ + +static tree +omp_resolve_late_declare_variant (tree alt) +{ + cgraph_node *node = cgraph_node::get (alt); + cgraph_node *cur_node = cgraph_node::get (cfun->decl); + if (node == NULL + || !node->declare_variant_alt + || !cfun->after_inlining) + return alt; + + omp_declare_variant_base_entry entry; + entry.base = NULL; + entry.node = node; + entry.variants = NULL; + omp_declare_variant_base_entry *entryp + = omp_declare_variant_alt->find_with_hash (&entry, DECL_UID (alt)); + + unsigned int i, j; + omp_declare_variant_entry *varentry1, *varentry2; + auto_vec <bool, 16> matches; + unsigned int nmatches = 0; + FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry1) + { + if (varentry1->matches) + { + /* This has been checked to be ok already. */ + matches.safe_push (true); + nmatches++; + continue; + } + switch (omp_context_selector_matches (varentry1->ctx)) + { + case 0: + matches.safe_push (false); + break; + case -1: + return alt; + default: + matches.safe_push (true); + nmatches++; + break; + } + } + + if (nmatches == 0) + return entryp->base->decl; + + /* A context selector that is a strict subset of another context selector + has a score of zero. */ + FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry1) + if (matches[i]) + { + for (j = i + 1; + vec_safe_iterate (entryp->variants, j, &varentry2); ++j) + if (matches[j]) + { + int r = omp_context_selector_compare (varentry1->ctx, + varentry2->ctx); + if (r == -1) + { + /* ctx1 is a strict subset of ctx2, ignore ctx1. */ + matches[i] = false; + break; + } + else if (r == 1) + /* ctx2 is a strict subset of ctx1, remove ctx2. */ + matches[j] = false; + } + } + + widest_int max_score = -1; + varentry2 = NULL; + FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry1) + if (matches[i]) + { + widest_int score + = (cur_node->simdclone ? varentry1->score_in_declare_simd_clone + : varentry1->score); + if (score > max_score) + { + max_score = score; + varentry2 = varentry1; + } + } + return varentry2->variant->decl; +} + +/* Hook to adjust hash tables on cgraph_node removal. */ + +static void +omp_declare_variant_remove_hook (struct cgraph_node *node, void *) +{ + if (!node->declare_variant_alt) + return; + + /* Drop this hash table completely. */ + omp_declare_variants = NULL; + /* And remove node from the other hash table. */ + if (omp_declare_variant_alt) + { + omp_declare_variant_base_entry entry; + entry.base = NULL; + entry.node = node; + entry.variants = NULL; + omp_declare_variant_alt->remove_elt_with_hash (&entry, + DECL_UID (node->decl)); + } +} + /* Try to resolve declare variant, return the variant decl if it should be used instead of base, or base otherwise. */ @@ -1422,6 +2074,9 @@ tree omp_resolve_declare_variant (tree base) { tree variant1 = NULL_TREE, variant2 = NULL_TREE; + if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0) + return omp_resolve_late_declare_variant (base); + auto_vec <tree, 16> variants; auto_vec <bool, 16> defer; bool any_deferred = false; @@ -1432,6 +2087,11 @@ omp_resolve_declare_variant (tree base) break; if (TREE_CODE (TREE_PURPOSE (TREE_VALUE (attr))) != FUNCTION_DECL) continue; + cgraph_node *node = cgraph_node::get (base); + /* If this is already a magic decl created by this function, + don't process it again. */ + if (node && node->declare_variant_alt) + return base; switch (omp_context_selector_matches (TREE_VALUE (TREE_VALUE (attr)))) { case 0: @@ -1459,6 +2119,10 @@ omp_resolve_declare_variant (tree base) bool first = true; unsigned int i; tree attr1, attr2; + omp_declare_variant_base_entry entry; + entry.base = cgraph_node::get_create (base); + entry.node = NULL; + vec_alloc (entry.variants, variants.length ()); FOR_EACH_VEC_ELT (variants, i, attr1) { widest_int score1; @@ -1498,6 +2162,14 @@ omp_resolve_declare_variant (tree base) variant2 = defer[i] ? NULL_TREE : attr1; } } + omp_declare_variant_entry varentry; + varentry.variant + = cgraph_node::get_create (TREE_PURPOSE (TREE_VALUE (attr1))); + varentry.score = score1; + varentry.score_in_declare_simd_clone = score2; + varentry.ctx = ctx; + varentry.matches = !defer[i]; + entry.variants->quick_push (varentry); } /* If there is a clear winner variant with the score which is not @@ -1522,17 +2194,73 @@ omp_resolve_declare_variant (tree base) } } if (variant1) - return TREE_PURPOSE (TREE_VALUE (variant1)); + { + vec_free (entry.variants); + return TREE_PURPOSE (TREE_VALUE (variant1)); + } } - return base; + static struct cgraph_node_hook_list *node_removal_hook_holder; + if (!node_removal_hook_holder) + node_removal_hook_holder + = symtab->add_cgraph_removal_hook (omp_declare_variant_remove_hook, + NULL); + + if (omp_declare_variants == NULL) + omp_declare_variants + = hash_table<omp_declare_variant_hasher>::create_ggc (64); + omp_declare_variant_base_entry **slot + = omp_declare_variants->find_slot (&entry, INSERT); + if (*slot != NULL) + { + vec_free (entry.variants); + return (*slot)->node->decl; + } + + *slot = ggc_cleared_alloc<omp_declare_variant_base_entry> (); + (*slot)->base = entry.base; + (*slot)->node = entry.base; + (*slot)->variants = entry.variants; + tree alt = build_decl (DECL_SOURCE_LOCATION (base), FUNCTION_DECL, + DECL_NAME (base), TREE_TYPE (base)); + DECL_ARTIFICIAL (alt) = 1; + DECL_IGNORED_P (alt) = 1; + TREE_STATIC (alt) = 1; + tree attributes = DECL_ATTRIBUTES (base); + if (lookup_attribute ("noipa", attributes) == NULL) + { + attributes = tree_cons (get_identifier ("noipa"), NULL, attributes); + if (lookup_attribute ("noinline", attributes) == NULL) + attributes = tree_cons (get_identifier ("noinline"), NULL, + attributes); + if (lookup_attribute ("noclone", attributes) == NULL) + attributes = tree_cons (get_identifier ("noclone"), NULL, + attributes); + if (lookup_attribute ("no_icf", attributes) == NULL) + attributes = tree_cons (get_identifier ("no_icf"), NULL, + attributes); + } + DECL_ATTRIBUTES (alt) = attributes; + DECL_INITIAL (alt) = error_mark_node; + (*slot)->node = cgraph_node::create (alt); + (*slot)->node->declare_variant_alt = 1; + (*slot)->node->create_reference (entry.base, IPA_REF_ADDR); + omp_declare_variant_entry *varentry; + FOR_EACH_VEC_SAFE_ELT (entry.variants, i, varentry) + (*slot)->node->create_reference (varentry->variant, IPA_REF_ADDR); + if (omp_declare_variant_alt == NULL) + omp_declare_variant_alt + = hash_table<omp_declare_variant_alt_hasher>::create_ggc (64); + *omp_declare_variant_alt->find_slot_with_hash (*slot, DECL_UID (alt), + INSERT) = *slot; + return alt; } if (variants.length () == 1) return TREE_PURPOSE (TREE_VALUE (variants[0])); - /* A context selector that is a strict subset of another context selector has a score - of zero. */ + /* A context selector that is a strict subset of another context selector + has a score of zero. */ tree attr1, attr2; unsigned int i, j; FOR_EACH_VEC_ELT (variants, i, attr1) @@ -1948,3 +2676,5 @@ oacc_get_ifn_dim_arg (const gimple *stmt) gcc_checking_assert (axis >= 0 && axis < GOMP_DIM_MAX); return (int) axis; } + +#include "gt-omp-general.h" |