diff options
author | Richard Biener <rguenther@suse.de> | 2024-10-17 10:27:58 +0200 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2024-10-17 12:54:22 +0200 |
commit | 1081f4cb34ea22e6ba07ddcb88cada3ec60bc9c4 (patch) | |
tree | 9027dc685e8fdf1c44c9b685a11e35d7fb9785f1 /gcc | |
parent | 8b9b696c98def874139effc0380929df4a4356f0 (diff) | |
download | gcc-1081f4cb34ea22e6ba07ddcb88cada3ec60bc9c4.zip gcc-1081f4cb34ea22e6ba07ddcb88cada3ec60bc9c4.tar.gz gcc-1081f4cb34ea22e6ba07ddcb88cada3ec60bc9c4.tar.bz2 |
tree-optimization/117172 - single lane SLP for non-linear inductions
The following adds single-lane SLP support for vectorizing non-linear
inductions.
This fixes a bunch of i386 specific testcases with --param vect-force-slp=1.
PR tree-optimization/117172
* tree-vect-loop.cc (vectorizable_nonlinear_induction): Add
single-lane SLP support.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/tree-vect-loop.cc | 26 |
1 files changed, 16 insertions, 10 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index d1f1edc..50a1531 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -10006,10 +10006,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, gcc_assert (induction_type > vect_step_op_add); - if (slp_node) - ncopies = 1; - else - ncopies = vect_get_num_copies (loop_vinfo, vectype); + ncopies = vect_get_num_copies (loop_vinfo, slp_node, vectype); gcc_assert (ncopies >= 1); /* FORNOW. Only handle nonlinear induction in the same loop. */ @@ -10024,9 +10021,10 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, iv_loop = loop; gcc_assert (iv_loop == (gimple_bb (phi))->loop_father); - /* TODO: Support slp for nonlinear iv. There should be separate vector iv - update for each iv and a permutation to generate wanted vector iv. */ - if (slp_node) + /* TODO: Support multi-lane SLP for nonlinear iv. There should be separate + vector iv update for each iv and a permutation to generate wanted + vector iv. */ + if (slp_node && SLP_TREE_LANES (slp_node) > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -10237,8 +10235,13 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), UNKNOWN_LOCATION); - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi); - *vec_stmt = induction_phi; + if (slp_node) + slp_node->push_vec_def (induction_phi); + else + { + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi); + *vec_stmt = induction_phi; + } /* In case that vectorization factor (VF) is bigger than the number of elements that we can fit in a vectype (nunits), we have to generate @@ -10268,7 +10271,10 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, induction_type); gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); new_stmt = SSA_NAME_DEF_STMT (vec_def); - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); + if (slp_node) + slp_node->push_vec_def (new_stmt); + else + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); } } |