aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/pr85720.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c2
-rw-r--r--gcc/tree-loop-distribution.c40
5 files changed, 61 insertions, 10 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 06e6591..68a4754 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,14 @@
2018-05-25 Bin Cheng <bin.cheng@arm.com>
+ PR tree-optimization/85720
+ * tree-loop-distribution.c (break_alias_scc_partitions): Don't merge
+ SCC if all partitions are builtins.
+ (version_loop_by_alias_check): New parameter. Generate cancelable
+ runtime alias check if all partitions are builtins.
+ (distribute_loop): Update call to above function.
+
+2018-05-25 Bin Cheng <bin.cheng@arm.com>
+
* tree-outof-ssa.c (tree-ssa.h, tree-dfa.h): Include header files.
(create_default_def, for_all_parms): Moved from tree-ssa-coalesce.c.
(parm_default_def_partition_arg): Ditto.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 92dc334..7d7fc8c 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2018-05-25 Bin Cheng <bin.cheng@arm.com>
+
+ PR tree-optimization/85720
+ * gcc.dg/tree-ssa/pr85720.c: New test.
+ * gcc.target/i386/avx256-unaligned-store-2.c: Disable loop pattern
+ distribution.
+
2018-05-25 Martin Liska <mliska@suse.cz>
PR testsuite/85911
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr85720.c b/gcc/testsuite/gcc.dg/tree-ssa/pr85720.c
new file mode 100644
index 0000000..18d8be9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr85720.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target size32plus } } */
+/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist" } */
+
+void fill(char* A, char* B, unsigned n)
+{
+ for (unsigned i = 0; i < n; i++)
+ {
+ A[i] = 0;
+ B[i] = A[i] + 1;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "_builtin_memset" 2 "ldist" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
index 87285c6..1e7969b 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O3 -mtune-ctrl=sse_typeless_stores -dp -mavx -mavx256-split-unaligned-store -mno-prefer-avx128" } */
+/* { dg-options "-O3 -mtune-ctrl=sse_typeless_stores -dp -mavx -mavx256-split-unaligned-store -mno-prefer-avx128 -fno-tree-loop-distribute-patterns" } */
#define N 1024
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 5e327f4..c6e0a60 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -2268,21 +2268,26 @@ break_alias_scc_partitions (struct graph *rdg,
for (j = 0; partitions->iterate (j, &first); ++j)
if (pg->vertices[j].component == i)
break;
+
+ bool same_type = true, all_builtins = partition_builtin_p (first);
for (++j; partitions->iterate (j, &partition); ++j)
{
if (pg->vertices[j].component != i)
continue;
- /* Note we Merge partitions of parallel type on purpose, though
- the result partition is sequential. The reason is vectorizer
- can do more accurate runtime alias check in this case. Also
- it results in more conservative distribution. */
if (first->type != partition->type)
{
- bitmap_clear_bit (sccs_to_merge, i);
+ same_type = false;
break;
}
+ all_builtins &= partition_builtin_p (partition);
}
+ /* Merge SCC if all partitions in SCC have the same type, though the
+ result partition is sequential, because vectorizer can do better
+ runtime alias check. One expecption is all partitions in SCC are
+ builtins. */
+ if (!same_type || all_builtins)
+ bitmap_clear_bit (sccs_to_merge, i);
}
/* Initialize callback data for traversing. */
@@ -2458,7 +2463,8 @@ compute_alias_check_pairs (struct loop *loop, vec<ddr_p> *alias_ddrs,
checks and version LOOP under condition of these runtime alias checks. */
static void
-version_loop_by_alias_check (struct loop *loop, vec<ddr_p> *alias_ddrs)
+version_loop_by_alias_check (vec<struct partition *> *partitions,
+ struct loop *loop, vec<ddr_p> *alias_ddrs)
{
profile_probability prob;
basic_block cond_bb;
@@ -2481,9 +2487,25 @@ version_loop_by_alias_check (struct loop *loop, vec<ddr_p> *alias_ddrs)
is_gimple_val, NULL_TREE);
/* Depend on vectorizer to fold IFN_LOOP_DIST_ALIAS. */
- if (flag_tree_loop_vectorize)
+ bool cancelable_p = flag_tree_loop_vectorize;
+ if (cancelable_p)
+ {
+ unsigned i = 0;
+ struct partition *partition;
+ for (; partitions->iterate (i, &partition); ++i)
+ if (!partition_builtin_p (partition))
+ break;
+
+ /* If all partitions are builtins, distributing it would be profitable and
+ we don't want to cancel the runtime alias checks. */
+ if (i == partitions->length ())
+ cancelable_p = false;
+ }
+
+ /* Generate internal function call for loop distribution alias check if the
+ runtime alias check should be cancelable. */
+ if (cancelable_p)
{
- /* Generate internal function call for loop distribution alias check. */
call_stmt = gimple_build_call_internal (IFN_LOOP_DIST_ALIAS,
2, NULL_TREE, cond_expr);
lhs = make_ssa_name (boolean_type_node);
@@ -2883,7 +2905,7 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
}
if (version_for_distribution_p (&partitions, &alias_ddrs))
- version_loop_by_alias_check (loop, &alias_ddrs);
+ version_loop_by_alias_check (&partitions, loop, &alias_ddrs);
if (dump_file && (dump_flags & TDF_DETAILS))
{