diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/omp-expand.c | 3 | ||||
-rw-r--r-- | gcc/omp-general.c | 129 | ||||
-rw-r--r-- | gcc/omp-general.h | 2 |
3 files changed, 130 insertions, 4 deletions
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index c3b8820..a721940 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -2262,6 +2262,7 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node)) != CODE_FOR_nothing)) { + tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1; tree itype = TREE_TYPE (fd->loops[i].v); tree min_inner_iterations = fd->min_inner_iterations; tree factor = fd->factor; @@ -2384,7 +2385,7 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, *gsi = gsi_after_labels (e->dest); t = fold_convert (itype, c); t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step); - t = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t); + t = fold_build2 (PLUS_EXPR, itype, outer_n1, t); t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true); diff --git a/gcc/omp-general.c b/gcc/omp-general.c index c6878cf..b2ce408 100644 --- a/gcc/omp-general.c +++ b/gcc/omp-general.c @@ -214,6 +214,7 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, fd->simd_schedule = false; fd->min_inner_iterations = NULL_TREE; fd->factor = NULL_TREE; + fd->adjn1 = NULL_TREE; collapse_iter = NULL; collapse_count = NULL; @@ -508,7 +509,10 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, continue; if (single_nonrect == -1 || (loop->m1 && TREE_CODE (loop->m1) != INTEGER_CST) - || (loop->m2 && TREE_CODE (loop->m2) != INTEGER_CST)) + || (loop->m2 && TREE_CODE (loop->m2) != INTEGER_CST) + || TREE_CODE (loop->n1) != INTEGER_CST + || TREE_CODE (loop->n2) != INTEGER_CST + || TREE_CODE (loop->step) != INTEGER_CST) { count = NULL_TREE; continue; @@ -574,12 +578,129 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, else if (t && t2 && integer_zerop (t) && integer_zerop (t2)) /* No iterations of the inner loop. count will be set to zero cst below. */; - else + else if (TYPE_UNSIGNED (itype) + || t == NULL_TREE + || t2 == NULL_TREE + || TREE_CODE (t) != INTEGER_CST + || TREE_CODE (t2) != INTEGER_CST) { /* Punt (for now). */ count = NULL_TREE; continue; } + else + { + /* Some iterations of the outer loop have zero iterations + of the inner loop, while others have at least one. + In this case, we need to adjust one of those outer + loop bounds. If ADJ_FIRST, we need to adjust outer n1 + (first), otherwise outer n2 (last). */ + bool adj_first = integer_zerop (t); + tree n1 = fold_convert (itype, loop->n1); + tree n2 = fold_convert (itype, loop->n2); + tree m1 = loop->m1 ? fold_convert (itype, loop->m1) + : build_zero_cst (itype); + tree m2 = loop->m2 ? fold_convert (itype, loop->m2) + : build_zero_cst (itype); + t = fold_binary (MINUS_EXPR, itype, n1, n2); + t2 = fold_binary (MINUS_EXPR, itype, m2, m1); + t = fold_binary (TRUNC_DIV_EXPR, itype, t, t2); + t2 = fold_binary (MINUS_EXPR, itype, t, first); + t2 = fold_binary (TRUNC_MOD_EXPR, itype, t2, ostep); + t = fold_binary (MINUS_EXPR, itype, t, t2); + tree n1cur + = fold_binary (PLUS_EXPR, itype, n1, + fold_binary (MULT_EXPR, itype, m1, t)); + tree n2cur + = fold_binary (PLUS_EXPR, itype, n2, + fold_binary (MULT_EXPR, itype, m2, t)); + t2 = fold_binary (loop->cond_code, boolean_type_node, + n1cur, n2cur); + tree t3 = fold_binary (MULT_EXPR, itype, m1, ostep); + tree t4 = fold_binary (MULT_EXPR, itype, m2, ostep); + tree diff; + if (adj_first) + { + tree new_first; + if (integer_nonzerop (t2)) + { + new_first = t; + n1first = n1cur; + n2first = n2cur; + if (flag_checking) + { + t3 = fold_binary (MINUS_EXPR, itype, n1cur, t3); + t4 = fold_binary (MINUS_EXPR, itype, n2cur, t4); + t3 = fold_binary (loop->cond_code, + boolean_type_node, t3, t4); + gcc_assert (integer_zerop (t3)); + } + } + else + { + t3 = fold_binary (PLUS_EXPR, itype, n1cur, t3); + t4 = fold_binary (PLUS_EXPR, itype, n2cur, t4); + new_first = fold_binary (PLUS_EXPR, itype, t, ostep); + n1first = t3; + n2first = t4; + if (flag_checking) + { + t3 = fold_binary (loop->cond_code, + boolean_type_node, t3, t4); + gcc_assert (integer_nonzerop (t3)); + } + } + diff = fold_binary (MINUS_EXPR, itype, new_first, first); + first = new_first; + fd->adjn1 = first; + } + else + { + tree new_last; + if (integer_zerop (t2)) + { + t3 = fold_binary (MINUS_EXPR, itype, n1cur, t3); + t4 = fold_binary (MINUS_EXPR, itype, n2cur, t4); + new_last = fold_binary (MINUS_EXPR, itype, t, ostep); + n1last = t3; + n2last = t4; + if (flag_checking) + { + t3 = fold_binary (loop->cond_code, + boolean_type_node, t3, t4); + gcc_assert (integer_nonzerop (t3)); + } + } + else + { + new_last = t; + n1last = n1cur; + n2last = n2cur; + if (flag_checking) + { + t3 = fold_binary (PLUS_EXPR, itype, n1cur, t3); + t4 = fold_binary (PLUS_EXPR, itype, n2cur, t4); + t3 = fold_binary (loop->cond_code, + boolean_type_node, t3, t4); + gcc_assert (integer_zerop (t3)); + } + } + diff = fold_binary (MINUS_EXPR, itype, last, new_last); + } + if (TYPE_UNSIGNED (itype) + && single_nonrect_cond_code == GT_EXPR) + diff = fold_binary (TRUNC_DIV_EXPR, itype, + fold_unary (NEGATE_EXPR, itype, diff), + fold_unary (NEGATE_EXPR, itype, + ostep)); + else + diff = fold_binary (TRUNC_DIV_EXPR, itype, diff, ostep); + diff = fold_convert (long_long_unsigned_type_node, diff); + single_nonrect_count + = fold_binary (MINUS_EXPR, long_long_unsigned_type_node, + single_nonrect_count, diff); + t = NULL_TREE; + } } else t = fold_binary (loop->cond_code, boolean_type_node, @@ -715,10 +836,11 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, *collapse_count = fold_convert_loc (loc, iter_type, count); if (fd->min_inner_iterations && fd->factor) { - t = make_tree_vec (3); + t = make_tree_vec (4); TREE_VEC_ELT (t, 0) = *collapse_count; TREE_VEC_ELT (t, 1) = fd->min_inner_iterations; TREE_VEC_ELT (t, 2) = fd->factor; + TREE_VEC_ELT (t, 3) = fd->adjn1; *collapse_count = t; } } @@ -736,6 +858,7 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd, gcc_assert (fd->non_rect); fd->min_inner_iterations = TREE_VEC_ELT (fd->loop.n2, 1); fd->factor = TREE_VEC_ELT (fd->loop.n2, 2); + fd->adjn1 = TREE_VEC_ELT (fd->loop.n2, 3); fd->loop.n2 = TREE_VEC_ELT (fd->loop.n2, 0); } fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1); diff --git a/gcc/omp-general.h b/gcc/omp-general.h index ec0f2a4..2da4d14 100644 --- a/gcc/omp-general.h +++ b/gcc/omp-general.h @@ -85,6 +85,8 @@ struct omp_for_data outer iterator, depending on which results in fewer iterations. */ tree factor; /* (m2 - m1) * outer_step / inner_step. */ + /* Adjusted n1 of the outer loop in such loop nests (if needed). */ + tree adjn1; }; #define OACC_FN_ATTRIB "oacc function" |