aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuri Rumyantsev <ysrumyan@gmail.com>2016-11-16 16:22:39 +0000
committerH.J. Lu <hjl@gcc.gnu.org>2016-11-16 08:22:39 -0800
commit598eaaa2a2368bb7d5ac3bafe7a0d1bb26d43f6e (patch)
treedc23b763501a2bee6d35f9e53240208b4bacc573
parent03b85dcd4861611b49a7e7bf737246b6460b2295 (diff)
downloadgcc-598eaaa2a2368bb7d5ac3bafe7a0d1bb26d43f6e.zip
gcc-598eaaa2a2368bb7d5ac3bafe7a0d1bb26d43f6e.tar.gz
gcc-598eaaa2a2368bb7d5ac3bafe7a0d1bb26d43f6e.tar.bz2
Support non-masked epilogue vectoriziation
gcc/ 2016-11-16 Yuri Rumyantsev <ysrumyan@gmail.com> * params.def (PARAM_VECT_EPILOGUES_NOMASK): New. * tree-if-conv.c (tree_if_conversion): Make public. * * tree-if-conv.h: New file. * tree-vect-data-refs.c (vect_analyze_data_ref_dependences) Avoid dynamic alias checks for epilogues. * tree-vect-loop-manip.c (vect_do_peeling): Return created epilog. * tree-vect-loop.c: include tree-if-conv.h. (new_loop_vec_info): Add zeroing orig_loop_info field. (vect_analyze_loop_2): Don't try to enhance alignment for epilogues. (vect_analyze_loop): Add argument ORIG_LOOP_INFO which is not NULL if epilogue is vectorized, set up orig_loop_info field of loop_vinfo using passed argument. (vect_transform_loop): Check if created epilogue should be returned for further vectorization with less vf. If-convert epilogue if required. Print vectorization success for epilogue. * tree-vectorizer.c (vectorize_loops): Add epilogue vectorization if it is required, pass loop_vinfo produced during vectorization of loop body to vect_analyze_loop. * tree-vectorizer.h (struct _loop_vec_info): Add new field orig_loop_info. (LOOP_VINFO_ORIG_LOOP_INFO): New. (LOOP_VINFO_EPILOGUE_P): New. (LOOP_VINFO_ORIG_VECT_FACTOR): New. (vect_do_peeling): Change prototype to return epilogue. (vect_analyze_loop): Add argument of loop_vec_info type. (vect_transform_loop): Return created loop. gcc/testsuite/ 2016-11-16 Yuri Rumyantsev <ysrumyan@gmail.com> * lib/target-supports.exp (check_avx2_hw_available): New. (check_effective_target_avx2_runtime): New. * gcc.dg/vect/vect-tail-nomask-1.c: New test. From-SVN: r242501
-rw-r--r--gcc/ChangeLog29
-rw-r--r--gcc/params.def5
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c106
-rw-r--r--gcc/testsuite/lib/target-supports.exp41
-rw-r--r--gcc/tree-if-conv.c2
-rw-r--r--gcc/tree-if-conv.h24
-rw-r--r--gcc/tree-vect-data-refs.c12
-rw-r--r--gcc/tree-vect-loop-manip.c10
-rw-r--r--gcc/tree-vect-loop.c102
-rw-r--r--gcc/tree-vectorizer.c17
-rw-r--r--gcc/tree-vectorizer.h19
12 files changed, 340 insertions, 33 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c3b99ed..3a48f13 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,32 @@
+2016-11-16 Yuri Rumyantsev <ysrumyan@gmail.com>
+
+ * params.def (PARAM_VECT_EPILOGUES_NOMASK): New.
+ * tree-if-conv.c (tree_if_conversion): Make public.
+ * * tree-if-conv.h: New file.
+ * tree-vect-data-refs.c (vect_analyze_data_ref_dependences) Avoid
+ dynamic alias checks for epilogues.
+ * tree-vect-loop-manip.c (vect_do_peeling): Return created epilog.
+ * tree-vect-loop.c: include tree-if-conv.h.
+ (new_loop_vec_info): Add zeroing orig_loop_info field.
+ (vect_analyze_loop_2): Don't try to enhance alignment for epilogues.
+ (vect_analyze_loop): Add argument ORIG_LOOP_INFO which is not NULL
+ if epilogue is vectorized, set up orig_loop_info field of loop_vinfo
+ using passed argument.
+ (vect_transform_loop): Check if created epilogue should be returned
+ for further vectorization with less vf. If-convert epilogue if
+ required. Print vectorization success for epilogue.
+ * tree-vectorizer.c (vectorize_loops): Add epilogue vectorization
+ if it is required, pass loop_vinfo produced during vectorization of
+ loop body to vect_analyze_loop.
+ * tree-vectorizer.h (struct _loop_vec_info): Add new field
+ orig_loop_info.
+ (LOOP_VINFO_ORIG_LOOP_INFO): New.
+ (LOOP_VINFO_EPILOGUE_P): New.
+ (LOOP_VINFO_ORIG_VECT_FACTOR): New.
+ (vect_do_peeling): Change prototype to return epilogue.
+ (vect_analyze_loop): Add argument of loop_vec_info type.
+ (vect_transform_loop): Return created loop.
+
2016-11-16 Segher Boessenkool <segher@kernel.crashing.org>
* config/rs6000/rs6000.c (rs6000_components_for_bb): Mark the LR
diff --git a/gcc/params.def b/gcc/params.def
index 89f7093..50f75a7 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1270,6 +1270,11 @@ DEFPARAM (PARAM_MAX_VRP_SWITCH_ASSERTIONS,
"edge of a switch statement during VRP",
10, 0, 0)
+DEFPARAM (PARAM_VECT_EPILOGUES_NOMASK,
+ "vect-epilogues-nomask",
+ "Enable loop epilogue vectorization using smaller vector size.",
+ 0, 0, 1)
+
/*
Local variables:
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 6edf715..2d03675 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-11-16 Yuri Rumyantsev <ysrumyan@gmail.com>
+
+ * lib/target-supports.exp (check_avx2_hw_available): New.
+ (check_effective_target_avx2_runtime): New.
+ * gcc.dg/vect/vect-tail-nomask-1.c: New test.
+
2016-11-16 Tamar Christina <tamar.christina@arm.com>
PR testsuite/78136
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
new file mode 100644
index 0000000..dc016bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
@@ -0,0 +1,106 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "--param vect-epilogues-nomask=1 -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size) __attribute__((weak));
+extern void free (void *);
+
+void __attribute__((noinline))
+test_citer (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c)
+{
+ int i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (int *)__builtin_assume_aligned (b, ALIGN);
+ c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < SIZE; i++)
+ c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+test_viter (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c,
+ int size)
+{
+ int i;
+
+ a = (int *)__builtin_assume_aligned (a, ALIGN);
+ b = (int *)__builtin_assume_aligned (b, ALIGN);
+ c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+ for (i = 0; i < size; i++)
+ c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+ int * __restrict__ b,
+ int * __restrict__ c,
+ int size)
+{
+ for (int i = 0; i < size; i++)
+ {
+ a[i] = i;
+ b[i] = -i;
+ c[i] = 0;
+ asm volatile("": : :"memory");
+ }
+ a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+ int *a;
+ int *b;
+ int *c;
+ int i;
+
+ if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+ if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+ if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+ return;
+
+ init_data (a, b, c, SIZE);
+ test_citer (a, b, c);
+ for (i = 0; i < SIZE; i++)
+ if (c[i] != a[i] + b[i])
+ __builtin_abort ();
+ if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+ __builtin_abort ();
+
+ init_data (a, b, c, SIZE);
+ test_viter (a, b, c, SIZE);
+ for (i = 0; i < SIZE; i++)
+ if (c[i] != a[i] + b[i])
+ __builtin_abort ();
+ if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+ __builtin_abort ();
+
+ free (a);
+ free (b);
+ free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+ if (!posix_memalign)
+ return 0;
+
+ run_test ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index b683c09..e62b768 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1730,6 +1730,36 @@ proc check_avx_hw_available { } {
}]
}
+# Return 1 if the target supports executing AVX2 instructions, 0
+# otherwise. Cache the result.
+
+proc check_avx2_hw_available { } {
+ return [check_cached_effective_target avx2_hw_available {
+ # If this is not the right target then we can skip the test.
+ if { !([istarget x86_64-*-*] || [istarget i?86-*-*]) } {
+ expr 0
+ } else {
+ check_runtime_nocache avx2_hw_available {
+ #include "cpuid.h"
+ int main ()
+ {
+ unsigned int eax, ebx, ecx, edx;
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+ || ((ecx & bit_OSXSAVE) != bit_OSXSAVE))
+ return 1;
+
+ if (__get_cpuid_max (0, NULL) < 7)
+ return 1;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+ return (ebx & bit_AVX2) != bit_AVX2;
+ }
+ } ""
+ }
+ }]
+}
+
# Return 1 if the target supports running SSE executables, 0 otherwise.
proc check_effective_target_sse_runtime { } {
@@ -1805,6 +1835,17 @@ proc check_effective_target_avx_runtime { } {
return 0
}
+# Return 1 if the target supports running AVX2 executables, 0 otherwise.
+
+proc check_effective_target_avx2_runtime { } {
+ if { [check_effective_target_avx2]
+ && [check_avx2_hw_available]
+ && [check_avx_os_support_available] } {
+ return 1
+ }
+ return 0
+}
+
# Return 1 if we are compiling for 64-bit PowerPC but we do not use direct
# move instructions for moves from GPR to FPR.
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
index 0a20189..0b86ffe 100644
--- a/gcc/tree-if-conv.c
+++ b/gcc/tree-if-conv.c
@@ -2734,7 +2734,7 @@ ifcvt_local_dce (basic_block bb)
profitability analysis. Returns non-zero todo flags when something
changed. */
-static unsigned int
+unsigned int
tree_if_conversion (struct loop *loop)
{
unsigned int todo = 0;
diff --git a/gcc/tree-if-conv.h b/gcc/tree-if-conv.h
new file mode 100644
index 0000000..3a732c2
--- /dev/null
+++ b/gcc/tree-if-conv.h
@@ -0,0 +1,24 @@
+/* Copyright (C) 2016 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef GCC_TREE_IF_CONV_H
+#define GCC_TREE_IF_CONV_H
+
+unsigned int tree_if_conversion (struct loop *);
+
+#endif /* GCC_TREE_IF_CONV_H */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 220dc30..5a30314 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -480,9 +480,15 @@ vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf)
LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
return false;
- FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
- if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
- return false;
+ /* For epilogues we either have no aliases or alias versioning
+ was applied to original loop. Therefore we may just get max_vf
+ using VF of original loop. */
+ if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
+ *max_vf = LOOP_VINFO_ORIG_VECT_FACTOR (loop_vinfo);
+ else
+ FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
+ if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
+ return false;
return true;
}
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 4c6b8c7..e13d6a2 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -1614,11 +1614,13 @@ slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
Note this function peels prolog and epilog only if it's necessary,
as well as guards.
+ Returns created epilogue or NULL.
TODO: Guard for prefer_scalar_loop should be emitted along with
versioning conditions if loop versioning is needed. */
-void
+
+struct loop *
vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
tree *niters_vector, int th, bool check_profitability,
bool niters_no_overflow)
@@ -1634,7 +1636,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
if (!prolog_peeling && !epilog_peeling)
- return;
+ return NULL;
prob_vector = 9 * REG_BR_PROB_BASE / 10;
if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
@@ -1642,7 +1644,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- struct loop *prolog, *epilog, *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
struct loop *first_loop = loop;
create_lcssa_for_virtual_phi (loop);
update_ssa (TODO_update_ssa_only_virtuals);
@@ -1824,6 +1826,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
}
adjust_vec.release ();
free_original_copy_tables ();
+
+ return epilog;
}
/* Function vect_create_cond_for_niters_checks.
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 53570f3..4150b0d 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3. If not see
#include "gimple-fold.h"
#include "cgraph.h"
#include "tree-cfg.h"
+#include "tree-if-conv.h"
/* Loop Vectorization Pass.
@@ -1171,6 +1172,7 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
LOOP_VINFO_PEELING_FOR_NITER (res) = false;
LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
+ LOOP_VINFO_ORIG_LOOP_INFO (res) = NULL;
return res;
}
@@ -2046,15 +2048,20 @@ start_over:
if (!ok)
return false;
- /* This pass will decide on using loop versioning and/or loop peeling in
- order to enhance the alignment of data references in the loop. */
- ok = vect_enhance_data_refs_alignment (loop_vinfo);
- if (!ok)
+ /* Do not invoke vect_enhance_data_refs_alignment for eplilogue
+ vectorization. */
+ if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "bad data alignment.\n");
- return false;
+ /* This pass will decide on using loop versioning and/or loop peeling in
+ order to enhance the alignment of data references in the loop. */
+ ok = vect_enhance_data_refs_alignment (loop_vinfo);
+ if (!ok)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "bad data alignment.\n");
+ return false;
+ }
}
if (slp)
@@ -2308,9 +2315,10 @@ again:
Apply a set of analyses on LOOP, and create a loop_vec_info struct
for it. The different analyses will record information in the
- loop_vec_info struct. */
+ loop_vec_info struct. If ORIG_LOOP_VINFO is not NULL epilogue must
+ be vectorized. */
loop_vec_info
-vect_analyze_loop (struct loop *loop)
+vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo)
{
loop_vec_info loop_vinfo;
unsigned int vector_sizes;
@@ -2346,6 +2354,10 @@ vect_analyze_loop (struct loop *loop)
}
bool fatal = false;
+
+ if (orig_loop_vinfo)
+ LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo;
+
if (vect_analyze_loop_2 (loop_vinfo, fatal))
{
LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
@@ -6696,12 +6708,14 @@ loop_niters_no_overflow (loop_vec_info loop_vinfo)
The analysis phase has determined that the loop is vectorizable.
Vectorize the loop - created vectorized stmts to replace the scalar
- stmts in the loop, and update the loop exit condition. */
+ stmts in the loop, and update the loop exit condition.
+ Returns scalar epilogue loop if any. */
-void
+struct loop *
vect_transform_loop (loop_vec_info loop_vinfo)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *epilogue = NULL;
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes;
int i;
@@ -6780,8 +6794,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
- vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th,
- check_profitability, niters_no_overflow);
+ epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th,
+ check_profitability, niters_no_overflow);
if (niters_vector == NULL_TREE)
{
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
@@ -7065,12 +7079,19 @@ vect_transform_loop (loop_vec_info loop_vinfo)
if (dump_enabled_p ())
{
- dump_printf_loc (MSG_NOTE, vect_location,
- "LOOP VECTORIZED\n");
- if (loop->inner)
+ if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "LOOP VECTORIZED\n");
+ if (loop->inner)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "OUTER LOOP VECTORIZED\n");
+ dump_printf (MSG_NOTE, "\n");
+ }
+ else
dump_printf_loc (MSG_NOTE, vect_location,
- "OUTER LOOP VECTORIZED\n");
- dump_printf (MSG_NOTE, "\n");
+ "LOOP EPILOGUE VECTORIZED (VS=%d)\n",
+ current_vector_size);
}
/* Free SLP instances here because otherwise stmt reference counting
@@ -7082,6 +7103,49 @@ vect_transform_loop (loop_vec_info loop_vinfo)
/* Clear-up safelen field since its value is invalid after vectorization
since vectorized loop can have loop-carried dependencies. */
loop->safelen = 0;
+
+ /* Don't vectorize epilogue for epilogue. */
+ if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
+ epilogue = NULL;
+
+ if (epilogue)
+ {
+ unsigned int vector_sizes
+ = targetm.vectorize.autovectorize_vector_sizes ();
+ vector_sizes &= current_vector_size - 1;
+
+ if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK))
+ epilogue = NULL;
+ else if (!vector_sizes)
+ epilogue = NULL;
+ else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
+ {
+ int smallest_vec_size = 1 << ctz_hwi (vector_sizes);
+ int ratio = current_vector_size / smallest_vec_size;
+ int eiters = LOOP_VINFO_INT_NITERS (loop_vinfo)
+ - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+ eiters = eiters % vf;
+
+ epilogue->nb_iterations_upper_bound = eiters - 1;
+
+ if (eiters < vf / ratio)
+ epilogue = NULL;
+ }
+ }
+
+ if (epilogue)
+ {
+ epilogue->force_vectorize = loop->force_vectorize;
+ epilogue->safelen = loop->safelen;
+ epilogue->dont_vectorize = false;
+
+ /* We may need to if-convert epilogue to vectorize it. */
+ if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
+ tree_if_conversion (epilogue);
+ }
+
+ return epilogue;
}
/* The code below is trying to perform simple optimization - revert
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 22e587a..35d7a3e 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -514,6 +514,7 @@ vectorize_loops (void)
hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
bool any_ifcvt_loops = false;
unsigned ret = 0;
+ struct loop *new_loop;
vect_loops_num = number_of_loops (cfun);
@@ -538,7 +539,8 @@ vectorize_loops (void)
&& optimize_loop_nest_for_speed_p (loop))
|| loop->force_vectorize)
{
- loop_vec_info loop_vinfo;
+ loop_vec_info loop_vinfo, orig_loop_vinfo = NULL;
+vectorize_epilogue:
vect_location = find_loop_location (loop);
if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
&& dump_enabled_p ())
@@ -546,7 +548,7 @@ vectorize_loops (void)
LOCATION_FILE (vect_location),
LOCATION_LINE (vect_location));
- loop_vinfo = vect_analyze_loop (loop);
+ loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo);
loop->aux = loop_vinfo;
if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
@@ -580,7 +582,7 @@ vectorize_loops (void)
&& dump_enabled_p ())
dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
"loop vectorized\n");
- vect_transform_loop (loop_vinfo);
+ new_loop = vect_transform_loop (loop_vinfo);
num_vectorized_loops++;
/* Now that the loop has been vectorized, allow it to be unrolled
etc. */
@@ -602,6 +604,15 @@ vectorize_loops (void)
fold_loop_vectorized_call (loop_vectorized_call, boolean_true_node);
ret |= TODO_cleanup_cfg;
}
+
+ if (new_loop)
+ {
+ /* Epilogue of vectorized loop must be vectorized too. */
+ vect_loops_num = number_of_loops (cfun);
+ loop = new_loop;
+ orig_loop_vinfo = loop_vinfo; /* To pass vect_analyze_loop. */
+ goto vectorize_epilogue;
+ }
}
vect_location = UNKNOWN_LOCATION;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 2a7cdfe..2a7fa0a 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -335,6 +335,10 @@ typedef struct _loop_vec_info : public vec_info {
/* Mark loops having masked stores. */
bool has_mask_store;
+ /* For loops being epilogues of already vectorized loops
+ this points to the original vectorized loop. Otherwise NULL. */
+ _loop_vec_info *orig_loop_info;
+
} *loop_vec_info;
/* Access Functions. */
@@ -374,6 +378,7 @@ typedef struct _loop_vec_info : public vec_info {
#define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store
#define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
#define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost
+#define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
((L)->may_misalign_stmts.length () > 0)
@@ -389,6 +394,12 @@ typedef struct _loop_vec_info : public vec_info {
#define LOOP_VINFO_NITERS_KNOWN_P(L) \
(tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
+#define LOOP_VINFO_EPILOGUE_P(L) \
+ (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL)
+
+#define LOOP_VINFO_ORIG_VECT_FACTOR(L) \
+ (LOOP_VINFO_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L)))
+
static inline loop_vec_info
loop_vec_info_for_loop (struct loop *loop)
{
@@ -1032,8 +1043,8 @@ extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *,
struct loop *, edge);
extern void vect_loop_versioning (loop_vec_info, unsigned int, bool);
-extern void vect_do_peeling (loop_vec_info, tree, tree,
- tree *, int, bool, bool);
+extern struct loop *vect_do_peeling (loop_vec_info, tree, tree,
+ tree *, int, bool, bool);
extern source_location find_loop_location (struct loop *);
extern bool vect_can_advance_ivs_p (loop_vec_info);
@@ -1144,11 +1155,11 @@ extern void destroy_loop_vec_info (loop_vec_info, bool);
extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, bool,
bool *, bool);
/* Drive for loop analysis stage. */
-extern loop_vec_info vect_analyze_loop (struct loop *);
+extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info);
extern tree vect_build_loop_niters (loop_vec_info);
extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, bool);
/* Drive for loop transformation stage. */
-extern void vect_transform_loop (loop_vec_info);
+extern struct loop *vect_transform_loop (loop_vec_info);
extern loop_vec_info vect_analyze_loop_form (struct loop *);
extern bool vectorizable_live_operation (gimple *, gimple_stmt_iterator *,
slp_tree, int, gimple **);