aboutsummaryrefslogtreecommitdiff
path: root/gcc/omp-general.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-07-09 12:07:17 +0200
committerJakub Jelinek <jakub@redhat.com>2020-07-09 12:07:17 +0200
commit5acef69f9d3d9f3c537b5e5157519edf02f86c4d (patch)
treeaf18107dc1e787b46c735b2eea3fad74d6b091a2 /gcc/omp-general.c
parentea82325afeccf3604f393916832eaadcbe1225bd (diff)
downloadgcc-5acef69f9d3d9f3c537b5e5157519edf02f86c4d.zip
gcc-5acef69f9d3d9f3c537b5e5157519edf02f86c4d.tar.gz
gcc-5acef69f9d3d9f3c537b5e5157519edf02f86c4d.tar.bz2
openmp: Optimize triangular loop logical iterator to actual iterators computation using search for quadratic equation root(s)
This patch implements the optimized logical to actual iterators computation for triangular loops. I have a rough implementation using integers, but this one uses floating point. There is a small problem that -fopenmp programs aren't linked with -lm, so it does it only if the hw has sqrt optab (and uses ifn rather than __builtin_sqrt because it obviously doesn't need errno handling etc.). Do you think it is ok this way, or should I use the integral computation using inlined isqrt (we have inequation of the form start >= x * t10 + t11 * (((x - 1) * x) / 2) where t10 and t11 are signed long long values and start unsigned long long, and the division by 2 actually is a problem for accuracy in some cases, so if we do it in integral, we need to do actually long long t12 = 2 * t10 - t11; unsigned long long t13 = t12 * t12 + start * 8 * t11; unsigned long long isqrt_ = isqrtull (t13); long long x = (((long long) isqrt_ - t12) / t11) >> 1; with careful overflow checking on all the computations before isqrtull (and on overflows use the fallback implementation). 2020-07-09 Jakub Jelinek <jakub@redhat.com> * omp-general.h (struct omp_for_data): Add min_inner_iterations and factor members. * omp-general.c (omp_extract_for_data): Initialize them and remember them in OMP_CLAUSE_COLLAPSE_COUNT if needed and restore from there. * omp-expand.c (expand_omp_for_init_counts): Fix up computation of counts[fd->last_nonrect] if fd->loop.n2 is INTEGER_CST. (expand_omp_for_init_vars): For fd->first_nonrect + 1 == fd->last_nonrect loops with for now INTEGER_CST fd->loop.n2 find quadratic equation roots instead of using fallback method when possible. * testsuite/libgomp.c/loop-19.c: New test. * testsuite/libgomp.c/loop-20.c: New test.
Diffstat (limited to 'gcc/omp-general.c')
-rw-r--r--gcc/omp-general.c23
1 files changed, 22 insertions, 1 deletions
diff --git a/gcc/omp-general.c b/gcc/omp-general.c
index 2a47466..c6878cf 100644
--- a/gcc/omp-general.c
+++ b/gcc/omp-general.c
@@ -212,6 +212,8 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
fd->sched_modifiers = 0;
fd->chunk_size = NULL_TREE;
fd->simd_schedule = false;
+ fd->min_inner_iterations = NULL_TREE;
+ fd->factor = NULL_TREE;
collapse_iter = NULL;
collapse_count = NULL;
@@ -653,6 +655,8 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
else
t2 = fold_build2 (TRUNC_DIV_EXPR, itype, t2, step);
t2 = fold_convert (llutype, t2);
+ fd->min_inner_iterations = t;
+ fd->factor = t2;
t = fold_build2 (MULT_EXPR, llutype, t,
single_nonrect_count);
tree t3 = fold_build2 (MINUS_EXPR, llutype,
@@ -707,7 +711,17 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
if (collapse_count && *collapse_count == NULL)
{
if (count)
- *collapse_count = fold_convert_loc (loc, iter_type, count);
+ {
+ *collapse_count = fold_convert_loc (loc, iter_type, count);
+ if (fd->min_inner_iterations && fd->factor)
+ {
+ t = make_tree_vec (3);
+ TREE_VEC_ELT (t, 0) = *collapse_count;
+ TREE_VEC_ELT (t, 1) = fd->min_inner_iterations;
+ TREE_VEC_ELT (t, 2) = fd->factor;
+ *collapse_count = t;
+ }
+ }
else
*collapse_count = create_tmp_var (iter_type, ".count");
}
@@ -717,6 +731,13 @@ omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
fd->loop.v = *collapse_iter;
fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0);
fd->loop.n2 = *collapse_count;
+ if (TREE_CODE (fd->loop.n2) == TREE_VEC)
+ {
+ gcc_assert (fd->non_rect);
+ fd->min_inner_iterations = TREE_VEC_ELT (fd->loop.n2, 1);
+ fd->factor = TREE_VEC_ELT (fd->loop.n2, 2);
+ fd->loop.n2 = TREE_VEC_ELT (fd->loop.n2, 0);
+ }
fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1);
fd->loop.m1 = NULL_TREE;
fd->loop.m2 = NULL_TREE;