aboutsummaryrefslogtreecommitdiff
path: root/gcc/omp-expand.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-07-09 12:07:17 +0200
committerJakub Jelinek <jakub@redhat.com>2020-07-09 12:07:17 +0200
commit5acef69f9d3d9f3c537b5e5157519edf02f86c4d (patch)
treeaf18107dc1e787b46c735b2eea3fad74d6b091a2 /gcc/omp-expand.c
parentea82325afeccf3604f393916832eaadcbe1225bd (diff)
downloadgcc-5acef69f9d3d9f3c537b5e5157519edf02f86c4d.zip
gcc-5acef69f9d3d9f3c537b5e5157519edf02f86c4d.tar.gz
gcc-5acef69f9d3d9f3c537b5e5157519edf02f86c4d.tar.bz2
openmp: Optimize triangular loop logical iterator to actual iterators computation using search for quadratic equation root(s)
This patch implements the optimized logical to actual iterators computation for triangular loops. I have a rough implementation using integers, but this one uses floating point. There is a small problem that -fopenmp programs aren't linked with -lm, so it does it only if the hw has sqrt optab (and uses ifn rather than __builtin_sqrt because it obviously doesn't need errno handling etc.). Do you think it is ok this way, or should I use the integral computation using inlined isqrt (we have inequation of the form start >= x * t10 + t11 * (((x - 1) * x) / 2) where t10 and t11 are signed long long values and start unsigned long long, and the division by 2 actually is a problem for accuracy in some cases, so if we do it in integral, we need to do actually long long t12 = 2 * t10 - t11; unsigned long long t13 = t12 * t12 + start * 8 * t11; unsigned long long isqrt_ = isqrtull (t13); long long x = (((long long) isqrt_ - t12) / t11) >> 1; with careful overflow checking on all the computations before isqrtull (and on overflows use the fallback implementation). 2020-07-09 Jakub Jelinek <jakub@redhat.com> * omp-general.h (struct omp_for_data): Add min_inner_iterations and factor members. * omp-general.c (omp_extract_for_data): Initialize them and remember them in OMP_CLAUSE_COLLAPSE_COUNT if needed and restore from there. * omp-expand.c (expand_omp_for_init_counts): Fix up computation of counts[fd->last_nonrect] if fd->loop.n2 is INTEGER_CST. (expand_omp_for_init_vars): For fd->first_nonrect + 1 == fd->last_nonrect loops with for now INTEGER_CST fd->loop.n2 find quadratic equation roots instead of using fallback method when possible. * testsuite/libgomp.c/loop-19.c: New test. * testsuite/libgomp.c/loop-20.c: New test.
Diffstat (limited to 'gcc/omp-expand.c')
-rw-r--r--gcc/omp-expand.c211
1 files changed, 205 insertions, 6 deletions
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index b1349d8..c3b8820 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -2137,7 +2137,7 @@ expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
int non_rect_referenced = 0, non_rect = 0;
for (i = 0; i < fd->collapse; i++)
{
- if ((i < fd->first_nonrect || fd->last_nonrect)
+ if ((i < fd->first_nonrect || i > fd->last_nonrect)
&& !integer_zerop (counts[i]))
t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
if (fd->loops[i].non_rect_referenced)
@@ -2249,17 +2249,208 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
t = tem;
if (i == fd->last_nonrect)
{
- /* Fallback implementation. Evaluate the loops in between
- (inclusive) fd->first_nonrect and fd->last_nonrect at
- runtime unsing temporaries instead of the original iteration
- variables, in the body just bump the counter and compare
- with the desired value. */
t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
tree stopval = t;
tree idx = create_tmp_reg (type, ".count");
expand_omp_build_assign (gsi, idx,
build_zero_cst (type), true);
+ basic_block bb_triang = NULL;
+ if (fd->first_nonrect + 1 == fd->last_nonrect
+ /* For now. */
+ && TREE_CODE (fd->loop.n2) == INTEGER_CST
+ && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
+ != CODE_FOR_nothing))
+ {
+ tree itype = TREE_TYPE (fd->loops[i].v);
+ tree min_inner_iterations = fd->min_inner_iterations;
+ tree factor = fd->factor;
+ gcond *cond_stmt
+ = gimple_build_cond (NE_EXPR, factor,
+ build_zero_cst (TREE_TYPE (factor)),
+ NULL_TREE, NULL_TREE);
+ gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
+ edge e = split_block (gsi_bb (*gsi), cond_stmt);
+ basic_block bb0 = e->src;
+ e->flags = EDGE_TRUE_VALUE;
+ e->probability = profile_probability::likely ();
+ *gsi = gsi_after_labels (e->dest);
+ tree slltype = long_long_integer_type_node;
+ tree ulltype = long_long_unsigned_type_node;
+ tree stopvalull = fold_convert (ulltype, stopval);
+ stopvalull
+ = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ min_inner_iterations
+ = fold_convert (slltype, min_inner_iterations);
+ min_inner_iterations
+ = force_gimple_operand_gsi (gsi, min_inner_iterations, true,
+ NULL_TREE, false,
+ GSI_CONTINUE_LINKING);
+ factor = fold_convert (slltype, factor);
+ factor
+ = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ tree min_inner_iterationsd
+ = fold_build1 (FLOAT_EXPR, double_type_node,
+ min_inner_iterations);
+ min_inner_iterationsd
+ = force_gimple_operand_gsi (gsi, min_inner_iterationsd, true,
+ NULL_TREE, false,
+ GSI_CONTINUE_LINKING);
+ tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
+ factor);
+ factord = force_gimple_operand_gsi (gsi, factord, true,
+ NULL_TREE, false,
+ GSI_CONTINUE_LINKING);
+ tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
+ stopvalull);
+ stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
+ NULL_TREE, false,
+ GSI_CONTINUE_LINKING);
+ /* Temporarily disable flag_rounding_math, values will be
+ decimal numbers divided by 2 and worst case imprecisions
+ due to too large values ought to be caught later by the
+ checks for fallback. */
+ int save_flag_rounding_math = flag_rounding_math;
+ flag_rounding_math = 0;
+ t = fold_build2 (RDIV_EXPR, double_type_node, factord,
+ build_real (double_type_node, dconst2));
+ tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
+ min_inner_iterationsd, t);
+ t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
+ GSI_CONTINUE_LINKING);
+ t = fold_build2 (MULT_EXPR, double_type_node, factord,
+ build_real (double_type_node, dconst2));
+ t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
+ t = fold_build2 (PLUS_EXPR, double_type_node, t,
+ fold_build2 (MULT_EXPR, double_type_node,
+ t3, t3));
+ flag_rounding_math = save_flag_rounding_math;
+ t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
+ GSI_CONTINUE_LINKING);
+ cond_stmt
+ = gimple_build_cond (LT_EXPR, t,
+ build_zero_cst (double_type_node),
+ NULL_TREE, NULL_TREE);
+ gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
+ e = split_block (gsi_bb (*gsi), cond_stmt);
+ basic_block bb1 = e->src;
+ e->flags = EDGE_FALSE_VALUE;
+ e->probability = profile_probability::very_likely ();
+ *gsi = gsi_after_labels (e->dest);
+ gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
+ tree sqrtr = create_tmp_var (double_type_node);
+ gimple_call_set_lhs (call, sqrtr);
+ gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
+ t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
+ t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
+ t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
+ tree c = create_tmp_var (ulltype);
+ tree d = create_tmp_var (ulltype);
+ expand_omp_build_assign (gsi, c, t, true);
+ t = fold_build2 (MINUS_EXPR, ulltype, c,
+ build_one_cst (ulltype));
+ t = fold_build2 (MULT_EXPR, ulltype, c, t);
+ t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
+ t = fold_build2 (MULT_EXPR, ulltype, fd->factor, t);
+ tree t2 = fold_build2 (MULT_EXPR, ulltype, c,
+ fd->min_inner_iterations);
+ t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
+ expand_omp_build_assign (gsi, d, t, true);
+ t = fold_build2 (MULT_EXPR, ulltype, fd->factor, c);
+ t = fold_build2 (PLUS_EXPR, ulltype,
+ t, fd->min_inner_iterations);
+ t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
+ GSI_CONTINUE_LINKING);
+ cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
+ NULL_TREE, NULL_TREE);
+ gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
+ e = split_block (gsi_bb (*gsi), cond_stmt);
+ basic_block bb2 = e->src;
+ e->flags = EDGE_TRUE_VALUE;
+ e->probability = profile_probability::very_likely ();
+ *gsi = gsi_after_labels (e->dest);
+ t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
+ t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
+ GSI_CONTINUE_LINKING);
+ cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
+ NULL_TREE, NULL_TREE);
+ gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
+ e = split_block (gsi_bb (*gsi), cond_stmt);
+ basic_block bb3 = e->src;
+ e->flags = EDGE_FALSE_VALUE;
+ e->probability = profile_probability::very_likely ();
+ *gsi = gsi_after_labels (e->dest);
+ t = fold_convert (itype, c);
+ t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
+ t = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t);
+ t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
+ GSI_CONTINUE_LINKING);
+ expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
+ t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
+ t2 = fold_convert (itype, t2);
+ t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
+ t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
+ if (fd->loops[i].m1)
+ {
+ t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
+ t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
+ }
+ expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
+ e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
+ bb_triang = e->src;
+ *gsi = gsi_after_labels (e->dest);
+ remove_edge (e);
+ e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
+ e->probability = profile_probability::very_unlikely ();
+ e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
+ e->probability = profile_probability::very_unlikely ();
+ e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
+ e->probability = profile_probability::very_unlikely ();
+
+ basic_block bb4 = create_empty_bb (bb0);
+ add_bb_to_loop (bb4, bb0->loop_father);
+ e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
+ e->probability = profile_probability::unlikely ();
+ make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
+ set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
+ set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
+ gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
+ t2 = fold_build2 (TRUNC_DIV_EXPR, type,
+ counts[i], counts[i - 1]);
+ t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
+ GSI_CONTINUE_LINKING);
+ t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
+ t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
+ t = fold_convert (itype, t);
+ t2 = fold_convert (itype, t2);
+ t = fold_build2 (MULT_EXPR, itype, t,
+ fold_convert (itype, fd->loops[i].step));
+ t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
+ t2 = fold_build2 (MULT_EXPR, itype, t2,
+ fold_convert (itype, fd->loops[i - 1].step));
+ t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
+ t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
+ gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
+ if (fd->loops[i].m1)
+ {
+ t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
+ fd->loops[i - 1].v);
+ t = fold_build2 (PLUS_EXPR, itype, t, t2);
+ }
+ t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
+ false, GSI_CONTINUE_LINKING);
+ stmt = gimple_build_assign (fd->loops[i].v, t);
+ gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
+ }
+ /* Fallback implementation. Evaluate the loops in between
+ (inclusive) fd->first_nonrect and fd->last_nonrect at
+ runtime unsing temporaries instead of the original iteration
+ variables, in the body just bump the counter and compare
+ with the desired value. */
gimple_stmt_iterator gsi2 = *gsi;
basic_block entry_bb = gsi_bb (gsi2);
edge e = split_block (entry_bb, gsi_stmt (gsi2));
@@ -2455,6 +2646,14 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
*gsi = gsi_last_bb (gsi_bb (*gsi));
else
gsi_prev (gsi);
+ if (bb_triang)
+ {
+ e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
+ make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
+ *gsi = gsi_after_labels (e->dest);
+ if (!gsi_end_p (*gsi))
+ gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
+ }
}
else
{