diff options
author | Andre Vieira <andre.simoesdiasvieira@arm.com> | 2022-01-19 14:11:32 +0000 |
---|---|---|
committer | Andre Vieira <andre.simoesdiasvieira@arm.com> | 2022-01-19 14:14:47 +0000 |
commit | f4ca0a53be18dfc7162fd5dcc1e73c4203805e14 (patch) | |
tree | e97006f3292a4830ced2c6aadff610c147e38a21 /gcc | |
parent | ffc7f200adbdf47f14b3594d9b21855c19cf797a (diff) | |
download | gcc-f4ca0a53be18dfc7162fd5dcc1e73c4203805e14.zip gcc-f4ca0a53be18dfc7162fd5dcc1e73c4203805e14.tar.gz gcc-f4ca0a53be18dfc7162fd5dcc1e73c4203805e14.tar.bz2 |
vect: Fix epilogue mode skipping
gcc/ChangeLog:
PR tree-optimization/103997
* tree-vect-loop.cc (vect_analyze_loop): Fix mode skipping for epilogue
vectorization.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/tree-vect-loop.cc | 24 |
1 files changed, 19 insertions, 5 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 0b2785a..4860bfd 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -3004,6 +3004,12 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) unsigned int mode_i = 0; unsigned HOST_WIDE_INT simdlen = loop->simdlen; + /* Keep track of the VF for each mode. Initialize all to 0 which indicates + a mode has not been analyzed. */ + auto_vec<poly_uint64, 8> cached_vf_per_mode; + for (unsigned i = 0; i < vector_modes.length (); ++i) + cached_vf_per_mode.safe_push (0); + /* First determine the main loop vectorization mode, either the first one that works, starting with auto-detecting the vector mode and then following the targets order of preference, or the one with the @@ -3011,6 +3017,10 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) while (1) { bool fatal; + unsigned int last_mode_i = mode_i; + /* Set cached VF to -1 prior to analysis, which indicates a mode has + failed. */ + cached_vf_per_mode[last_mode_i] = -1; opt_loop_vec_info loop_vinfo = vect_analyze_loop_1 (loop, shared, &loop_form_info, NULL, vector_modes, mode_i, @@ -3020,6 +3030,12 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) if (loop_vinfo) { + /* Analyzis has been successful so update the VF value. The + VF should always be a multiple of unroll_factor and we want to + capture the original VF here. */ + cached_vf_per_mode[last_mode_i] + = exact_div (LOOP_VINFO_VECT_FACTOR (loop_vinfo), + loop_vinfo->suggested_unroll_factor); /* Once we hit the desired simdlen for the first time, discard any previous attempts. */ if (simdlen @@ -3100,12 +3116,10 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) { /* If the target does not support partial vectors we can shorten the number of modes to analyze for the epilogue as we know we can't pick a - mode that has at least as many NUNITS as the main loop's vectorization - factor, since that would imply the epilogue's vectorization factor - would be at least as high as the main loop's and we would be - vectorizing for more scalar iterations than there would be left. */ + mode that would lead to a VF at least as big as the + FIRST_VINFO_VF. */ if (!supports_partial_vectors - && maybe_ge (GET_MODE_NUNITS (vector_modes[mode_i]), first_vinfo_vf)) + && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf)) { mode_i++; if (mode_i == vector_modes.length ()) |