diff options
author | Richard Biener <rguenther@suse.de> | 2023-08-15 15:17:08 +0200 |
---|---|---|
committer | Richard Biener <rguenther@suse.de> | 2023-08-17 13:16:22 +0200 |
commit | 99b5921bfc8f9122fb7ccb0a4284179fd3a2f2ba (patch) | |
tree | f85a48ef714a413598269c83682a67da1ce7f9f4 /gcc/tree-vect-slp.cc | |
parent | 17d670dbca16d8ccb502c5d82c67981b1ba937af (diff) | |
download | gcc-99b5921bfc8f9122fb7ccb0a4284179fd3a2f2ba.zip gcc-99b5921bfc8f9122fb7ccb0a4284179fd3a2f2ba.tar.gz gcc-99b5921bfc8f9122fb7ccb0a4284179fd3a2f2ba.tar.bz2 |
Handle TYPE_OVERFLOW_UNDEFINED vectorized BB reductions
The following changes the gate to perform vectorization of BB reductions
to use needs_fold_left_reduction_p which in turn requires handling
TYPE_OVERFLOW_UNDEFINED types in the epilogue code generation by
promoting any operations generated there to use unsigned arithmetic.
The following does this, there's currently only v16qi where x86
supports a .REDUC_PLUS reduction for integral modes so I had to
add a x86 specific testcase using GIMPLE IL.
* tree-vect-slp.cc (vect_slp_check_for_roots): Use
!needs_fold_left_reduction_p to decide whether we can
handle the reduction with association.
(vectorize_slp_instance_root_stmt): For TYPE_OVERFLOW_UNDEFINED
reductions perform all arithmetic in an unsigned type.
* gcc.target/i386/vect-reduc-2.c: New testcase.
Diffstat (limited to 'gcc/tree-vect-slp.cc')
-rw-r--r-- | gcc/tree-vect-slp.cc | 46 |
1 files changed, 32 insertions, 14 deletions
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index cf1f8f9..89c3216 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -7217,13 +7217,10 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo) } else if (!VECTOR_TYPE_P (TREE_TYPE (rhs)) && (associative_tree_code (code) || code == MINUS_EXPR) - /* ??? The flag_associative_math and TYPE_OVERFLOW_WRAPS - checks pessimize a two-element reduction. PR54400. + /* ??? This pessimizes a two-element reduction. PR54400. ??? In-order reduction could be handled if we only traverse one operand chain in vect_slp_linearize_chain. */ - && ((FLOAT_TYPE_P (TREE_TYPE (rhs)) && flag_associative_math) - || (INTEGRAL_TYPE_P (TREE_TYPE (rhs)) - && TYPE_OVERFLOW_WRAPS (TREE_TYPE (rhs)))) + && !needs_fold_left_reduction_p (TREE_TYPE (rhs), code) /* Ops with constants at the tail can be stripped here. */ && TREE_CODE (rhs) == SSA_NAME && TREE_CODE (gimple_assign_rhs2 (assign)) == SSA_NAME @@ -9161,9 +9158,25 @@ vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance) /* We may end up with more than one vector result, reduce them to one vector. */ tree vec_def = vec_defs[0]; + tree vectype = TREE_TYPE (vec_def); + tree compute_vectype = vectype; + bool pun_for_overflow_p = (ANY_INTEGRAL_TYPE_P (vectype) + && TYPE_OVERFLOW_UNDEFINED (vectype)); + if (pun_for_overflow_p) + { + compute_vectype = unsigned_type_for (vectype); + vec_def = gimple_build (&epilogue, VIEW_CONVERT_EXPR, + compute_vectype, vec_def); + } for (unsigned i = 1; i < vec_defs.length (); ++i) - vec_def = gimple_build (&epilogue, reduc_code, TREE_TYPE (vec_def), - vec_def, vec_defs[i]); + { + tree def = vec_defs[i]; + if (pun_for_overflow_p) + def = gimple_build (&epilogue, VIEW_CONVERT_EXPR, + compute_vectype, def); + vec_def = gimple_build (&epilogue, reduc_code, compute_vectype, + vec_def, def); + } vec_defs.release (); /* ??? Support other schemes than direct internal fn. */ internal_fn reduc_fn; @@ -9171,21 +9184,26 @@ vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance) || reduc_fn == IFN_LAST) gcc_unreachable (); tree scalar_def = gimple_build (&epilogue, as_combined_fn (reduc_fn), - TREE_TYPE (TREE_TYPE (vec_def)), vec_def); + TREE_TYPE (compute_vectype), vec_def); if (!SLP_INSTANCE_REMAIN_DEFS (instance).is_empty ()) { tree rem_def = NULL_TREE; for (auto def : SLP_INSTANCE_REMAIN_DEFS (instance)) - if (!rem_def) - rem_def = def; - else - rem_def = gimple_build (&epilogue, reduc_code, - TREE_TYPE (scalar_def), - rem_def, def); + { + def = gimple_convert (&epilogue, TREE_TYPE (scalar_def), def); + if (!rem_def) + rem_def = def; + else + rem_def = gimple_build (&epilogue, reduc_code, + TREE_TYPE (scalar_def), + rem_def, def); + } scalar_def = gimple_build (&epilogue, reduc_code, TREE_TYPE (scalar_def), scalar_def, rem_def); } + scalar_def = gimple_convert (&epilogue, + TREE_TYPE (vectype), scalar_def); gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmts[0]->stmt); gsi_insert_seq_before (&rgsi, epilogue, GSI_SAME_STMT); gimple_assign_set_rhs_from_tree (&rgsi, scalar_def); |