diff options
author | Feng Xue <fxue@os.amperecomputing.com> | 2024-05-29 13:12:12 +0800 |
---|---|---|
committer | Feng Xue <fxue@os.amperecomputing.com> | 2024-06-01 08:31:43 +0800 |
commit | c0f31701556c4162463f28bc0f03007f40a6176e (patch) | |
tree | 6d3c21cd28a6d3541d798525de3f5e301127924e | |
parent | 3c75a4c03da78b434603bc0bd88a1395c3c5b25c (diff) | |
download | gcc-c0f31701556c4162463f28bc0f03007f40a6176e.zip gcc-c0f31701556c4162463f28bc0f03007f40a6176e.tar.gz gcc-c0f31701556c4162463f28bc0f03007f40a6176e.tar.bz2 |
vect: Add a function to check lane-reducing code
Check if an operation is lane-reducing requires comparison of code against
three kinds (DOT_PROD_EXPR/WIDEN_SUM_EXPR/SAD_EXPR). Add an utility
function to make source coding for the check handy and concise.
2024-05-29 Feng Xue <fxue@os.amperecomputing.com>
gcc/
* tree-vectorizer.h (lane_reducing_op_p): New function.
* tree-vect-slp.cc (vect_analyze_slp): Use new function
lane_reducing_op_p to check statement code.
* tree-vect-loop.cc (vect_transform_reduction): Likewise.
(vectorizable_reduction): Likewise, and change name of a local
variable that holds the result flag.
-rw-r--r-- | gcc/tree-vect-loop.cc | 29 | ||||
-rw-r--r-- | gcc/tree-vect-slp.cc | 4 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 6 |
3 files changed, 19 insertions, 20 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 04a9ac6..a42d79c 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -7650,9 +7650,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, gimple_match_op op; if (!gimple_extract_op (stmt_info->stmt, &op)) gcc_unreachable (); - bool lane_reduc_code_p = (op.code == DOT_PROD_EXPR - || op.code == WIDEN_SUM_EXPR - || op.code == SAD_EXPR); + bool lane_reducing = lane_reducing_op_p (op.code); if (!POINTER_TYPE_P (op.type) && !INTEGRAL_TYPE_P (op.type) && !SCALAR_FLOAT_TYPE_P (op.type)) @@ -7664,7 +7662,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, /* For lane-reducing ops we're reducing the number of reduction PHIs which means the only use of that may be in the lane-reducing operation. */ - if (lane_reduc_code_p + if (lane_reducing && reduc_chain_length != 1 && !only_slp_reduc_chain) { @@ -7678,7 +7676,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, since we'll mix lanes belonging to different reductions. But it's OK to use them in a reduction chain or when the reduction group has just one element. */ - if (lane_reduc_code_p + if (lane_reducing && slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) && SLP_TREE_LANES (slp_node) > 1) @@ -7738,7 +7736,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, /* To properly compute ncopies we are interested in the widest non-reduction input type in case we're looking at a widening accumulation that we later handle in vect_transform_reduction. */ - if (lane_reduc_code_p + if (lane_reducing && vectype_op[i] && (!vectype_in || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in))) @@ -8211,7 +8209,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, && loop_vinfo->suggested_unroll_factor == 1) single_defuse_cycle = true; - if (single_defuse_cycle || lane_reduc_code_p) + if (single_defuse_cycle || lane_reducing) { gcc_assert (op.code != COND_EXPR); @@ -8227,7 +8225,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, mixed-sign dot-products can be implemented using signed dot-products. */ machine_mode vec_mode = TYPE_MODE (vectype_in); - if (!lane_reduc_code_p + if (!lane_reducing && !directly_supported_p (op.code, vectype_in, optab_vector)) { if (dump_enabled_p ()) @@ -8252,7 +8250,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, For the other cases try without the single cycle optimization. */ if (!ok) { - if (lane_reduc_code_p) + if (lane_reducing) return false; else single_defuse_cycle = false; @@ -8263,7 +8261,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, /* If the reduction stmt is one of the patterns that have lane reduction embedded we cannot handle the case of ! single_defuse_cycle. */ if ((ncopies > 1 && ! single_defuse_cycle) - && lane_reduc_code_p) + && lane_reducing) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -8274,7 +8272,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, if (slp_node && !(!single_defuse_cycle - && !lane_reduc_code_p + && !lane_reducing && reduction_type != FOLD_LEFT_REDUCTION)) for (i = 0; i < (int) op.num_ops; i++) if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_op[i])) @@ -8295,7 +8293,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, /* Cost the reduction op inside the loop if transformed via vect_transform_reduction. Otherwise this is costed by the separate vectorizable_* routines. */ - if (single_defuse_cycle || lane_reduc_code_p) + if (single_defuse_cycle || lane_reducing) { int factor = 1; if (vect_is_emulated_mixed_dot_prod (loop_vinfo, stmt_info)) @@ -8313,7 +8311,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, /* All but single defuse-cycle optimized, lane-reducing and fold-left reductions go through their own vectorizable_* routines. */ if (!single_defuse_cycle - && !lane_reduc_code_p + && !lane_reducing && reduction_type != FOLD_LEFT_REDUCTION) { stmt_vec_info tem @@ -8555,10 +8553,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo, } bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info); - gcc_assert (single_defuse_cycle - || code == DOT_PROD_EXPR - || code == WIDEN_SUM_EXPR - || code == SAD_EXPR); + gcc_assert (single_defuse_cycle || lane_reducing_op_p (code)); /* Create the destination vector */ tree scalar_dest = gimple_get_lhs (stmt_info->stmt); diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index bc7a85d..bf1f467 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -3928,9 +3928,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) /* Do not discover SLP reductions for lane-reducing ops, that will fail later. */ && (!(g = dyn_cast <gassign *> (STMT_VINFO_STMT (next_info))) - || (gimple_assign_rhs_code (g) != DOT_PROD_EXPR - && gimple_assign_rhs_code (g) != WIDEN_SUM_EXPR - && gimple_assign_rhs_code (g) != SAD_EXPR))) + || !lane_reducing_op_p (gimple_assign_rhs_code (g)))) scalar_stmts.quick_push (next_info); } if (scalar_stmts.length () > 1) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 4798234..97ec9c3 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2169,6 +2169,12 @@ vect_apply_runtime_profitability_check_p (loop_vec_info loop_vinfo) && th >= vect_vf_for_cost (loop_vinfo)); } +inline bool +lane_reducing_op_p (code_helper code) +{ + return code == DOT_PROD_EXPR || code == WIDEN_SUM_EXPR || code == SAD_EXPR; +} + /* Source location + hotness information. */ extern dump_user_location_t vect_location; |