aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2015-11-27 14:17:28 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2015-11-27 14:17:28 +0000
commit72c0f64330a0a5500fe97bf829ce181a28820fdf (patch)
tree7b174d0e3a941e415681d1d52723e50d8079280a /gcc
parent00e5241831c1227615a45b7bcba29c393671cb3f (diff)
downloadgcc-72c0f64330a0a5500fe97bf829ce181a28820fdf.zip
gcc-72c0f64330a0a5500fe97bf829ce181a28820fdf.tar.gz
gcc-72c0f64330a0a5500fe97bf829ce181a28820fdf.tar.bz2
re PR tree-optimization/68559 (Excessive peeling for gaps)
2015-11-27 Richard Biener <rguenther@suse.de> PR tree-optimization/68559 * tree-vect-data-refs.c (vect_analyze_group_access_1): Move peeling for gap checks ... * tree-vect-stmts.c (vectorizable_load): ... here and relax for SLP. * tree-vect-loop.c (vect_analyze_loop_2): Re-set LOOP_VINFO_PEELING_FOR_GAPS before re-trying without SLP. * gcc.dg/vect/slp-perm-4.c: Adjust again. * gcc.dg/vect/pr45752.c: Likewise. From-SVN: r231015
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog10
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr45752.c11
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-perm-4.c8
-rw-r--r--gcc/tree-vect-data-refs.c45
-rw-r--r--gcc/tree-vect-loop.c1
-rw-r--r--gcc/tree-vect-stmts.c40
7 files changed, 59 insertions, 62 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ca9635b..505c693 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2015-11-27 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/68559
+ * tree-vect-data-refs.c (vect_analyze_group_access_1): Move
+ peeling for gap checks ...
+ * tree-vect-stmts.c (vectorizable_load): ... here and relax
+ for SLP.
+ * tree-vect-loop.c (vect_analyze_loop_2): Re-set
+ LOOP_VINFO_PEELING_FOR_GAPS before re-trying without SLP.
+
2015-11-27 Nathan Sidwell <nathan@acm.org>
* config/nvptx/nvptx-protos.h (nvptx_record_needed_decl): Don't
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index b5b837d..d58666e 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2015-11-27 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/68559
+ * gcc.dg/vect/slp-perm-4.c: Adjust again.
+ * gcc.dg/vect/pr45752.c: Likewise.
+
2015-11-27 Jakub Jelinek <jakub@redhat.com>
PR rtl-optimization/68250
diff --git a/gcc/testsuite/gcc.dg/vect/pr45752.c b/gcc/testsuite/gcc.dg/vect/pr45752.c
index ab95ad6..0736a74 100644
--- a/gcc/testsuite/gcc.dg/vect/pr45752.c
+++ b/gcc/testsuite/gcc.dg/vect/pr45752.c
@@ -33,7 +33,7 @@
#define M34 7716
#define M44 16
-#define N 40
+#define N 20
void foo (unsigned int *__restrict__ pInput,
unsigned int *__restrict__ pOutput,
@@ -77,14 +77,10 @@ int main (int argc, const char* argv[])
unsigned int input[N], output[N], i, input2[N], output2[N];
unsigned int check_results[N]
= {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399,
- 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619,
- 42488, 15014, 587164, 257979, 41229, 52308, 18434, 726764, 313554, 50839,
- 62128, 21854, 866364, 369129, 60449, 71948, 25274, 1005964, 424704, 70059};
+ 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619 };
unsigned int check_results2[N]
= {7136, 2702, 84604, 57909, 6633, 16956, 6122, 224204, 113484, 16243,
- 26776, 9542, 363804, 169059, 25853, 36596, 12962, 503404, 224634, 35463,
- 46416, 16382, 643004, 280209, 45073, 56236, 19802, 782604, 335784, 54683,
- 66056, 23222, 922204, 391359, 64293, 75876, 26642, 1061804, 446934, 73903};
+ 26776, 9542, 363804, 169059, 25853, 36596, 12962, 503404, 224634, 35463 };
check_vect ();
@@ -108,4 +104,5 @@ int main (int argc, const char* argv[])
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "gaps requires scalar epilogue loop" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-4.c b/gcc/testsuite/gcc.dg/vect/slp-perm-4.c
index 8e1b5d4..80bc58c 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-4.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-4.c
@@ -33,7 +33,7 @@
#define M34 7716
#define M44 16
-#define N 40
+#define N 20
void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput)
{
@@ -60,9 +60,7 @@ int main (int argc, const char* argv[])
unsigned int input[N], output[N], i;
unsigned int check_results[N]
= {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399,
- 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619,
- 42488, 15014, 587164, 257979, 41229, 52308, 18434, 726764, 313554, 50839,
- 62128, 21854, 866364, 369129, 60449, 71948, 25274, 1005964, 424704, 70059};
+ 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619};
check_vect ();
@@ -85,5 +83,5 @@ int main (int argc, const char* argv[])
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "gaps requires scalar epilogue loop" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
-
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 62e61e0..7962e36 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -2166,10 +2166,6 @@ vect_analyze_group_access_1 (struct data_reference *dr)
HOST_WIDE_INT dr_step = -1;
HOST_WIDE_INT groupsize, last_accessed_element = 1;
bool slp_impossible = false;
- struct loop *loop = NULL;
-
- if (loop_vinfo)
- loop = LOOP_VINFO_LOOP (loop_vinfo);
/* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
size of the interleaving group (including gaps). */
@@ -2227,24 +2223,6 @@ vect_analyze_group_access_1 (struct data_reference *dr)
dump_printf (MSG_NOTE, "\n");
}
- if (loop_vinfo)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "Data access with gaps requires scalar "
- "epilogue loop\n");
- if (loop->inner)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Peeling for outer loop is not"
- " supported\n");
- return false;
- }
-
- LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
- }
-
return true;
}
@@ -2399,29 +2377,6 @@ vect_analyze_group_access_1 (struct data_reference *dr)
if (bb_vinfo)
BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
}
-
- /* If there is a gap in the end of the group or the group size cannot
- be made a multiple of the vector element count then we access excess
- elements in the last iteration and thus need to peel that off. */
- if (loop_vinfo
- && (groupsize - last_accessed_element > 0
- || exact_log2 (groupsize) == -1))
-
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Data access with gaps requires scalar "
- "epilogue loop\n");
- if (loop->inner)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Peeling for outer loop is not supported\n");
- return false;
- }
-
- LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
- }
}
return true;
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 6719c9a..7d1f555 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2190,6 +2190,7 @@ again:
= init_cost (LOOP_VINFO_LOOP (loop_vinfo));
/* Reset assorted flags. */
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
+ LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
goto start_over;
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 687f982..3b078da 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -6246,15 +6246,45 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
that leaves unused vector loads around punt - we at least create
very sub-optimal code in that case (and blow up memory,
see PR65518). */
+ bool force_peeling = false;
if (first_stmt == stmt
- && !GROUP_NEXT_ELEMENT (stmt_info)
- && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
+ && !GROUP_NEXT_ELEMENT (stmt_info))
+ {
+ if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "single-element interleaving not supported "
+ "for not adjacent vector loads\n");
+ return false;
+ }
+
+ /* Single-element interleaving requires peeling for gaps. */
+ force_peeling = true;
+ }
+
+ /* If there is a gap in the end of the group or the group size cannot
+ be made a multiple of the vector element count then we access excess
+ elements in the last iteration and thus need to peel that off. */
+ if (loop_vinfo
+ && ! STMT_VINFO_STRIDED_P (stmt_info)
+ && (force_peeling
+ || GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
+ || (!slp && vf % GROUP_SIZE (vinfo_for_stmt (first_stmt)) != 0)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "single-element interleaving not supported "
- "for not adjacent vector loads\n");
- return false;
+ "Data access with gaps requires scalar "
+ "epilogue loop\n");
+ if (loop->inner)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Peeling for outer loop is not supported\n");
+ return false;
+ }
+
+ LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
}
if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())