aboutsummaryrefslogtreecommitdiff
path: root/gcc/omp-expand.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-10-07 10:49:37 +0200
committerJakub Jelinek <jakub@redhat.com>2020-10-07 10:49:37 +0200
commit83f565ed4f37e550e1d40f7b6cf0b5845f29a9c7 (patch)
treebb7bbda573e19900da9e4c94d79b4bdfa77773af /gcc/omp-expand.c
parentebc77ce3a4c70730b4e38d68f88693eadbdc8712 (diff)
downloadgcc-83f565ed4f37e550e1d40f7b6cf0b5845f29a9c7.zip
gcc-83f565ed4f37e550e1d40f7b6cf0b5845f29a9c7.tar.gz
gcc-83f565ed4f37e550e1d40f7b6cf0b5845f29a9c7.tar.bz2
openmp: Improve composite simd vectorization
> > I was really hoping bbs 4 and 5 would be one loop (the one I set safelen > > and force_vectorize etc. for) and that basic blocks 6 and 7 would be > > together with that inner loop another loop, but apparently loop discovery > > thinks it is just one loop. > > Any ideas what I'm doing wrong or is there any way how to make it two loops > > (that would also survive all the cfg cleanups until vectorization)? > > The early CFG looks like we have a common header with two latches > so it boils down to how we disambiguate those in the end (we seem > to unify the latches via a forwarder). IIRC OMP lowering builds > loops itself, could it not do the appropriate disambiguation itself? I realized I emit the same stmts on both paths (before goto doit; and before falling through it), at least the MIN_EXPR and PLUS_EXPR, so by forcing there an extra bb which does those two and having the "doit" label before that the innermost loop doesn't have multiple latches anymore and so is vectorized fine. 2020-10-07 Jakub Jelinek <jakub@redhat.com> * omp-expand.c (expand_omp_simd): Don't emit MIN_EXPR and PLUS_EXPR at the end of entry_bb and innermost init_bb, instead force arguments for MIN_EXPR into temporaries in both cases and jump to a new bb that performs MIN_EXPR and PLUS_EXPR. * gcc.dg/gomp/simd-2.c: New test. * gcc.dg/gomp/simd-3.c: New test.
Diffstat (limited to 'gcc/omp-expand.c')
-rw-r--r--gcc/omp-expand.c19
1 files changed, 15 insertions, 4 deletions
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index 99cb4f9..0d30089 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -6347,6 +6347,7 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
tree n2var = NULL_TREE;
tree n2v = NULL_TREE;
tree *nonrect_bounds = NULL;
+ tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
if (fd->collapse > 1)
{
if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
@@ -6406,9 +6407,10 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
fold_convert (itype, fd->loops[i].step));
t = fold_convert (type, t);
tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
- t = fold_build2 (MIN_EXPR, type, t2, t);
- t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
- expand_omp_build_assign (&gsi, n2var, t);
+ min_arg1 = create_tmp_var (type);
+ expand_omp_build_assign (&gsi, min_arg1, t2);
+ min_arg2 = create_tmp_var (type);
+ expand_omp_build_assign (&gsi, min_arg2, t);
}
else
{
@@ -6815,7 +6817,16 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
}
else
t = counts[i + 1];
- t = fold_build2 (MIN_EXPR, type, t2, t);
+ expand_omp_build_assign (&gsi, min_arg1, t2);
+ expand_omp_build_assign (&gsi, min_arg2, t);
+ e = split_block (init_bb, last_stmt (init_bb));
+ gsi = gsi_after_labels (e->dest);
+ init_bb = e->dest;
+ remove_edge (FALLTHRU_EDGE (entry_bb));
+ make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
+ set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
+ set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
+ t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
expand_omp_build_assign (&gsi, n2var, t);
}