aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog14
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c4
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-32.c29
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-35.c28
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c28
-rw-r--r--gcc/tree-loop-distribution.c123
-rw-r--r--gcc/tree-ssa-loop-ivopts.c5
-rw-r--r--gcc/tree-ssa-loop-ivopts.h1
9 files changed, 232 insertions, 7 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1eee85d..8e056f2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,19 @@
2017-10-18 Bin Cheng <bin.cheng@arm.com>
+ * tree-loop-distribution.c (INCLUDE_ALGORITHM): New header file.
+ (tree-ssa-loop-ivopts.h): New header file.
+ (struct builtin_info): New fields.
+ (classify_builtin_1): Compute and record base and offset parts for
+ memset builtin partition by calling strip_offset.
+ (offset_cmp, fuse_memset_builtins): New functions.
+ (finalize_partitions): Fuse adjacent memset partitions by calling
+ above function.
+ * tree-ssa-loop-ivopts.c (strip_offset): Delete static declaration.
+ Expose the interface.
+ * tree-ssa-loop-ivopts.h (strip_offset): New declaration.
+
+2017-10-18 Bin Cheng <bin.cheng@arm.com>
+
PR tree-optimization/82574
* tree-loop-distribution.c (find_single_drs): New parameter. Check
that data reference must be executed exactly once per iteration
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d642a6a..ee05077e 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,12 @@
2017-10-18 Bin Cheng <bin.cheng@arm.com>
+ * gcc.dg/tree-ssa/ldist-17.c: Adjust test string.
+ * gcc.dg/tree-ssa/ldist-32.c: New test.
+ * gcc.dg/tree-ssa/ldist-35.c: New test.
+ * gcc.dg/tree-ssa/ldist-36.c: New test.
+
+2017-10-18 Bin Cheng <bin.cheng@arm.com>
+
PR tree-optimization/82574
* gcc.dg/tree-ssa/pr82574.c: New test.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c
index 4efc0a4..b3617f6 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-17.c
@@ -45,5 +45,5 @@ mad_synth_mute (struct mad_synth *synth)
return;
}
-/* { dg-final { scan-tree-dump "distributed: split to 0 loops and 4 library calls" "ldist" } } */
-/* { dg-final { scan-tree-dump-times "generated memset zero" 4 "ldist" } } */
+/* { dg-final { scan-tree-dump "Loop nest . distributed: split to 0 loops and 1 library calls" "ldist" } } */
+/* { dg-final { scan-tree-dump-times "generated memset zero" 1 "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-32.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-32.c
new file mode 100644
index 0000000..477d222
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-32.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
+
+#define M (256)
+#define N (512)
+
+struct st
+{
+ int a[M][N];
+ int c[M];
+ int b[M][N];
+};
+
+void
+foo (struct st *p)
+{
+ for (unsigned i = 0; i < M; ++i)
+ {
+ p->c[i] = 0;
+ for (unsigned j = N; j > 0; --j)
+ {
+ p->a[i][j - 1] = 0;
+ p->b[i][j - 1] = 0;
+ }
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 0 loops and 1 library" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_memset \\(.*, 0, 1049600\\);" 1 "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-35.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-35.c
new file mode 100644
index 0000000..445d23d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-35.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
+
+#define M (256)
+#define N (512)
+
+struct st
+{
+ int a[M][N];
+ int c[M];
+ int b[M][N];
+};
+
+void
+foo (struct st * restrict p, struct st * restrict q)
+{
+ for (unsigned i = 0; i < M; ++i)
+ {
+ p->c[i] = 0;
+ for (unsigned j = N; j > 0; --j)
+ {
+ p->a[i][j - 1] = q->a[i][j - 1];
+ p->b[i][j - 1] = 0;
+ }
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 0 loops and 1 library" 1 "ldist" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c
new file mode 100644
index 0000000..0e843f4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
+
+#define M (256)
+#define N (512)
+
+struct st
+{
+ int a[M][N];
+ int c[M];
+ int b[M][N];
+};
+
+void
+foo (struct st * restrict p)
+{
+ for (unsigned i = 0; i < M; ++i)
+ {
+ p->c[i] = 0;
+ for (unsigned j = N; j > 0; --j)
+ {
+ p->b[i][j - 1] = p->a[i][j - 1];
+ p->a[i][j - 1] = 0;
+ }
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 0 loops and 3 library" 1 "ldist" } } */
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 6abb7e4..52db3c9 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see
data reuse. */
#include "config.h"
+#define INCLUDE_ALGORITHM /* stable_sort */
#include "system.h"
#include "coretypes.h"
#include "backend.h"
@@ -106,6 +107,7 @@ along with GCC; see the file COPYING3. If not see
#include "stor-layout.h"
#include "tree-cfg.h"
#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop-ivopts.h"
#include "tree-ssa-loop.h"
#include "tree-into-ssa.h"
#include "tree-ssa.h"
@@ -604,6 +606,10 @@ struct builtin_info
tree dst_base;
tree src_base;
tree size;
+ /* Base and offset part of dst_base after stripping constant offset. This
+ is only used in memset builtin distribution for now. */
+ tree dst_base_base;
+ unsigned HOST_WIDE_INT dst_base_offset;
};
/* Partition for loop distribution. */
@@ -1504,7 +1510,11 @@ classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr)
if (!compute_access_range (loop, dr, &base, &size))
return;
- partition->builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size);
+ struct builtin_info *builtin;
+ builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size);
+ builtin->dst_base_base = strip_offset (builtin->dst_base,
+ &builtin->dst_base_offset);
+ partition->builtin = builtin;
partition->kind = PKIND_MEMSET;
}
@@ -2480,6 +2490,113 @@ version_for_distribution_p (vec<struct partition *> *partitions,
return (alias_ddrs->length () > 0);
}
+/* Compare base offset of builtin mem* partitions P1 and P2. */
+
+static bool
+offset_cmp (struct partition *p1, struct partition *p2)
+{
+ gcc_assert (p1 != NULL && p1->builtin != NULL);
+ gcc_assert (p2 != NULL && p2->builtin != NULL);
+ return p1->builtin->dst_base_offset < p2->builtin->dst_base_offset;
+}
+
+/* Fuse adjacent memset builtin PARTITIONS if possible. This is a special
+ case optimization transforming below code:
+
+ __builtin_memset (&obj, 0, 100);
+ _1 = &obj + 100;
+ __builtin_memset (_1, 0, 200);
+ _2 = &obj + 300;
+ __builtin_memset (_2, 0, 100);
+
+ into:
+
+ __builtin_memset (&obj, 0, 400);
+
+ Note we don't have dependence information between different partitions
+ at this point, as a result, we can't handle nonadjacent memset builtin
+ partitions since dependence might be broken. */
+
+static void
+fuse_memset_builtins (vec<struct partition *> *partitions)
+{
+ unsigned i, j;
+ struct partition *part1, *part2;
+
+ for (i = 0; partitions->iterate (i, &part1);)
+ {
+ if (part1->kind != PKIND_MEMSET)
+ {
+ i++;
+ continue;
+ }
+
+ /* Find sub-array of memset builtins of the same base. Index range
+ of the sub-array is [i, j) with "j > i". */
+ for (j = i + 1; partitions->iterate (j, &part2); ++j)
+ {
+ if (part2->kind != PKIND_MEMSET
+ || !operand_equal_p (part1->builtin->dst_base_base,
+ part2->builtin->dst_base_base, 0))
+ break;
+ }
+
+ /* Stable sort is required in order to avoid breaking dependence. */
+ std::stable_sort (&(*partitions)[i],
+ &(*partitions)[i] + j - i, offset_cmp);
+ /* Continue with next partition. */
+ i = j;
+ }
+
+ /* Merge all consecutive memset builtin partitions. */
+ for (i = 0; i < partitions->length () - 1;)
+ {
+ part1 = (*partitions)[i];
+ if (part1->kind != PKIND_MEMSET)
+ {
+ i++;
+ continue;
+ }
+
+ part2 = (*partitions)[i + 1];
+ /* Only merge memset partitions of the same base and with constant
+ access sizes. */
+ if (part2->kind != PKIND_MEMSET
+ || TREE_CODE (part1->builtin->size) != INTEGER_CST
+ || TREE_CODE (part2->builtin->size) != INTEGER_CST
+ || !operand_equal_p (part1->builtin->dst_base_base,
+ part2->builtin->dst_base_base, 0))
+ {
+ i++;
+ continue;
+ }
+ tree rhs1 = gimple_assign_rhs1 (DR_STMT (part1->builtin->dst_dr));
+ tree rhs2 = gimple_assign_rhs1 (DR_STMT (part2->builtin->dst_dr));
+ int bytev1 = const_with_all_bytes_same (rhs1);
+ int bytev2 = const_with_all_bytes_same (rhs2);
+ /* Only merge memset partitions of the same value. */
+ if (bytev1 != bytev2 || bytev1 == -1)
+ {
+ i++;
+ continue;
+ }
+ wide_int end1 = wi::add (part1->builtin->dst_base_offset,
+ wi::to_wide (part1->builtin->size));
+ /* Only merge adjacent memset partitions. */
+ if (wi::ne_p (end1, part2->builtin->dst_base_offset))
+ {
+ i++;
+ continue;
+ }
+ /* Merge partitions[i] and partitions[i+1]. */
+ part1->builtin->size = fold_build2 (PLUS_EXPR, sizetype,
+ part1->builtin->size,
+ part2->builtin->size);
+ partition_free (part2);
+ partitions->ordered_remove (i + 1);
+ }
+}
+
/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
ALIAS_DDRS contains ddrs which need runtime alias check. */
@@ -2523,6 +2640,10 @@ finalize_partitions (struct loop *loop, vec<struct partition *> *partitions,
}
partitions->truncate (1);
}
+
+ /* Fuse memset builtins if possible. */
+ if (partitions->length () > 1)
+ fuse_memset_builtins (partitions);
}
/* Distributes the code from LOOP in such a way that producer statements
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 737e393..793e66f 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -1550,9 +1550,6 @@ record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
}
-static tree
-strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
-
/* Record a group of TYPE. */
static struct iv_group *
@@ -2863,7 +2860,7 @@ strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
/* Strips constant offsets from EXPR and stores them to OFFSET. */
-static tree
+tree
strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
{
HOST_WIDE_INT off;
diff --git a/gcc/tree-ssa-loop-ivopts.h b/gcc/tree-ssa-loop-ivopts.h
index f8f31e9..bd92051 100644
--- a/gcc/tree-ssa-loop-ivopts.h
+++ b/gcc/tree-ssa-loop-ivopts.h
@@ -28,6 +28,7 @@ extern void dump_cand (FILE *, struct iv_cand *);
extern bool contains_abnormal_ssa_name_p (tree);
extern struct loop *outermost_invariant_loop_for_expr (struct loop *, tree);
extern bool expr_invariant_in_loop_p (struct loop *, tree);
+extern tree strip_offset (tree, unsigned HOST_WIDE_INT *);
bool may_be_nonaddressable_p (tree expr);
void tree_ssa_iv_optimize (void);