aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFeng Xue <fxue@os.amperecomputing.com>2024-05-29 13:12:12 +0800
committerFeng Xue <fxue@os.amperecomputing.com>2024-06-01 08:31:43 +0800
commitc0f31701556c4162463f28bc0f03007f40a6176e (patch)
tree6d3c21cd28a6d3541d798525de3f5e301127924e
parent3c75a4c03da78b434603bc0bd88a1395c3c5b25c (diff)
downloadgcc-c0f31701556c4162463f28bc0f03007f40a6176e.zip
gcc-c0f31701556c4162463f28bc0f03007f40a6176e.tar.gz
gcc-c0f31701556c4162463f28bc0f03007f40a6176e.tar.bz2
vect: Add a function to check lane-reducing code
Check if an operation is lane-reducing requires comparison of code against three kinds (DOT_PROD_EXPR/WIDEN_SUM_EXPR/SAD_EXPR). Add an utility function to make source coding for the check handy and concise. 2024-05-29 Feng Xue <fxue@os.amperecomputing.com> gcc/ * tree-vectorizer.h (lane_reducing_op_p): New function. * tree-vect-slp.cc (vect_analyze_slp): Use new function lane_reducing_op_p to check statement code. * tree-vect-loop.cc (vect_transform_reduction): Likewise. (vectorizable_reduction): Likewise, and change name of a local variable that holds the result flag.
-rw-r--r--gcc/tree-vect-loop.cc29
-rw-r--r--gcc/tree-vect-slp.cc4
-rw-r--r--gcc/tree-vectorizer.h6
3 files changed, 19 insertions, 20 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 04a9ac6..a42d79c 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7650,9 +7650,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
gimple_match_op op;
if (!gimple_extract_op (stmt_info->stmt, &op))
gcc_unreachable ();
- bool lane_reduc_code_p = (op.code == DOT_PROD_EXPR
- || op.code == WIDEN_SUM_EXPR
- || op.code == SAD_EXPR);
+ bool lane_reducing = lane_reducing_op_p (op.code);
if (!POINTER_TYPE_P (op.type) && !INTEGRAL_TYPE_P (op.type)
&& !SCALAR_FLOAT_TYPE_P (op.type))
@@ -7664,7 +7662,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
/* For lane-reducing ops we're reducing the number of reduction PHIs
which means the only use of that may be in the lane-reducing operation. */
- if (lane_reduc_code_p
+ if (lane_reducing
&& reduc_chain_length != 1
&& !only_slp_reduc_chain)
{
@@ -7678,7 +7676,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
since we'll mix lanes belonging to different reductions. But it's
OK to use them in a reduction chain or when the reduction group
has just one element. */
- if (lane_reduc_code_p
+ if (lane_reducing
&& slp_node
&& !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
&& SLP_TREE_LANES (slp_node) > 1)
@@ -7738,7 +7736,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
/* To properly compute ncopies we are interested in the widest
non-reduction input type in case we're looking at a widening
accumulation that we later handle in vect_transform_reduction. */
- if (lane_reduc_code_p
+ if (lane_reducing
&& vectype_op[i]
&& (!vectype_in
|| (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
@@ -8211,7 +8209,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
&& loop_vinfo->suggested_unroll_factor == 1)
single_defuse_cycle = true;
- if (single_defuse_cycle || lane_reduc_code_p)
+ if (single_defuse_cycle || lane_reducing)
{
gcc_assert (op.code != COND_EXPR);
@@ -8227,7 +8225,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
mixed-sign dot-products can be implemented using signed
dot-products. */
machine_mode vec_mode = TYPE_MODE (vectype_in);
- if (!lane_reduc_code_p
+ if (!lane_reducing
&& !directly_supported_p (op.code, vectype_in, optab_vector))
{
if (dump_enabled_p ())
@@ -8252,7 +8250,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
For the other cases try without the single cycle optimization. */
if (!ok)
{
- if (lane_reduc_code_p)
+ if (lane_reducing)
return false;
else
single_defuse_cycle = false;
@@ -8263,7 +8261,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
/* If the reduction stmt is one of the patterns that have lane
reduction embedded we cannot handle the case of ! single_defuse_cycle. */
if ((ncopies > 1 && ! single_defuse_cycle)
- && lane_reduc_code_p)
+ && lane_reducing)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -8274,7 +8272,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
if (slp_node
&& !(!single_defuse_cycle
- && !lane_reduc_code_p
+ && !lane_reducing
&& reduction_type != FOLD_LEFT_REDUCTION))
for (i = 0; i < (int) op.num_ops; i++)
if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_op[i]))
@@ -8295,7 +8293,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
/* Cost the reduction op inside the loop if transformed via
vect_transform_reduction. Otherwise this is costed by the
separate vectorizable_* routines. */
- if (single_defuse_cycle || lane_reduc_code_p)
+ if (single_defuse_cycle || lane_reducing)
{
int factor = 1;
if (vect_is_emulated_mixed_dot_prod (loop_vinfo, stmt_info))
@@ -8313,7 +8311,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
/* All but single defuse-cycle optimized, lane-reducing and fold-left
reductions go through their own vectorizable_* routines. */
if (!single_defuse_cycle
- && !lane_reduc_code_p
+ && !lane_reducing
&& reduction_type != FOLD_LEFT_REDUCTION)
{
stmt_vec_info tem
@@ -8555,10 +8553,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
}
bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);
- gcc_assert (single_defuse_cycle
- || code == DOT_PROD_EXPR
- || code == WIDEN_SUM_EXPR
- || code == SAD_EXPR);
+ gcc_assert (single_defuse_cycle || lane_reducing_op_p (code));
/* Create the destination vector */
tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index bc7a85d..bf1f467 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3928,9 +3928,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
/* Do not discover SLP reductions for lane-reducing ops, that
will fail later. */
&& (!(g = dyn_cast <gassign *> (STMT_VINFO_STMT (next_info)))
- || (gimple_assign_rhs_code (g) != DOT_PROD_EXPR
- && gimple_assign_rhs_code (g) != WIDEN_SUM_EXPR
- && gimple_assign_rhs_code (g) != SAD_EXPR)))
+ || !lane_reducing_op_p (gimple_assign_rhs_code (g))))
scalar_stmts.quick_push (next_info);
}
if (scalar_stmts.length () > 1)
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 4798234..97ec9c3 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2169,6 +2169,12 @@ vect_apply_runtime_profitability_check_p (loop_vec_info loop_vinfo)
&& th >= vect_vf_for_cost (loop_vinfo));
}
+inline bool
+lane_reducing_op_p (code_helper code)
+{
+ return code == DOT_PROD_EXPR || code == WIDEN_SUM_EXPR || code == SAD_EXPR;
+}
+
/* Source location + hotness information. */
extern dump_user_location_t vect_location;