aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-loop.cc')
-rw-r--r--gcc/tree-vect-loop.cc180
1 files changed, 149 insertions, 31 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 568353a..6c24202 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3297,6 +3297,28 @@ reduction_fn_for_scalar_code (code_helper code, internal_fn *reduc_fn)
}
}
+/* Set *SBOOL_FN to the corresponding function working on vector masks
+ for REDUC_FN. Return true if that exists, false otherwise. */
+
+static bool
+sbool_reduction_fn_for_fn (internal_fn reduc_fn, internal_fn *sbool_fn)
+{
+ switch (reduc_fn)
+ {
+ case IFN_REDUC_AND:
+ *sbool_fn = IFN_REDUC_SBOOL_AND;
+ return true;
+ case IFN_REDUC_IOR:
+ *sbool_fn = IFN_REDUC_SBOOL_IOR;
+ return true;
+ case IFN_REDUC_XOR:
+ *sbool_fn = IFN_REDUC_SBOOL_XOR;
+ return true;
+ default:
+ return false;
+ }
+}
+
/* If there is a neutral value X such that a reduction would not be affected
by the introduction of additional X elements, return that X, otherwise
return null. CODE is the code of the reduction and SCALAR_TYPE is type
@@ -4902,17 +4924,16 @@ get_initial_defs_for_reduction (loop_vec_info loop_vinfo,
if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
nunits = group_size;
+ tree vector_elt_type = TREE_TYPE (vector_type);
number_of_places_left_in_vector = nunits;
bool constant_p = true;
tree_vector_builder elts (vector_type, nunits, 1);
elts.quick_grow (nunits);
gimple_seq ctor_seq = NULL;
if (neutral_op
- && !useless_type_conversion_p (TREE_TYPE (vector_type),
+ && !useless_type_conversion_p (vector_elt_type,
TREE_TYPE (neutral_op)))
- neutral_op = gimple_convert (&ctor_seq,
- TREE_TYPE (vector_type),
- neutral_op);
+ neutral_op = gimple_convert (&ctor_seq, vector_elt_type, neutral_op);
for (j = 0; j < nunits * number_of_vectors; ++j)
{
tree op;
@@ -4924,11 +4945,22 @@ get_initial_defs_for_reduction (loop_vec_info loop_vinfo,
op = neutral_op;
else
{
- if (!useless_type_conversion_p (TREE_TYPE (vector_type),
+ if (!useless_type_conversion_p (vector_elt_type,
TREE_TYPE (initial_values[i])))
- initial_values[i] = gimple_convert (&ctor_seq,
- TREE_TYPE (vector_type),
- initial_values[i]);
+ {
+ if (VECTOR_BOOLEAN_TYPE_P (vector_type))
+ initial_values[i] = gimple_build (&ctor_seq, COND_EXPR,
+ vector_elt_type,
+ initial_values[i],
+ build_all_ones_cst
+ (vector_elt_type),
+ build_zero_cst
+ (vector_elt_type));
+ else
+ initial_values[i] = gimple_convert (&ctor_seq,
+ vector_elt_type,
+ initial_values[i]);
+ }
op = initial_values[i];
}
@@ -5482,6 +5514,15 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
&& slp_reduc
&& !TYPE_VECTOR_SUBPARTS (vectype).is_constant ());
+ /* If signed overflow is undefined we might need to perform reduction
+ computations in an unsigned type. */
+ tree compute_vectype = vectype;
+ if (ANY_INTEGRAL_TYPE_P (vectype)
+ && TYPE_OVERFLOW_UNDEFINED (vectype)
+ && code.is_tree_code ()
+ && arith_code_with_undefined_signed_overflow ((tree_code) code))
+ compute_vectype = unsigned_type_for (vectype);
+
/* In case of reduction chain, e.g.,
# a1 = phi <a3, a0>
a2 = operation (a1)
@@ -5501,16 +5542,27 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
a multiple of the SLP group size.
The same is true if we couldn't use a single defuse cycle. */
- if (!slp_reduc
- || direct_slp_reduc
- || (slp_reduc
- && constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype), group_size)))
+ if ((!slp_reduc
+ || direct_slp_reduc
+ || (slp_reduc
+ && constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype), group_size)))
+ && reduc_inputs.length () > 1)
{
gimple_seq stmts = NULL;
tree single_input = reduc_inputs[0];
+ if (compute_vectype != vectype)
+ single_input = gimple_build (&stmts, VIEW_CONVERT_EXPR,
+ compute_vectype, single_input);
for (k = 1; k < reduc_inputs.length (); k++)
- single_input = gimple_build (&stmts, code, vectype,
- single_input, reduc_inputs[k]);
+ {
+ tree input = gimple_build (&stmts, VIEW_CONVERT_EXPR,
+ compute_vectype, reduc_inputs[k]);
+ single_input = gimple_build (&stmts, code, compute_vectype,
+ single_input, input);
+ }
+ if (compute_vectype != vectype)
+ single_input = gimple_build (&stmts, VIEW_CONVERT_EXPR,
+ vectype, single_input);
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
reduc_inputs.truncate (0);
@@ -5549,6 +5601,22 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
/* Shouldn't be used beyond this point. */
exit_bb = nullptr;
+ /* If we are operating on a mask vector and do not support direct mask
+ reduction, work on a bool data vector instead of a mask vector. */
+ if (VECTOR_BOOLEAN_TYPE_P (vectype)
+ && VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info)
+ && vectype != VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info))
+ {
+ gcc_assert (reduc_inputs.length () == 1);
+ vectype = VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info);
+ gimple_seq stmts = NULL;
+ reduc_inputs[0] = gimple_build (&stmts, VEC_COND_EXPR, vectype,
+ reduc_inputs[0],
+ build_one_cst (vectype),
+ build_zero_cst (vectype));
+ gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
+ }
+
if (VECT_REDUC_INFO_TYPE (reduc_info) == COND_REDUCTION
&& reduc_fn != IFN_LAST)
{
@@ -5857,6 +5925,18 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
halves against each other. */
enum machine_mode mode1 = mode;
tree stype = TREE_TYPE (vectype);
+ if (compute_vectype != vectype)
+ {
+ stype = unsigned_type_for (stype);
+ gimple_seq stmts = NULL;
+ for (unsigned i = 0; i < reduc_inputs.length (); ++i)
+ {
+ tree new_temp = gimple_build (&stmts, VIEW_CONVERT_EXPR,
+ compute_vectype, reduc_inputs[i]);
+ reduc_inputs[i] = new_temp;
+ }
+ gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
+ }
unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
unsigned nunits1 = nunits;
if ((mode1 = targetm.vectorize.split_reduction (mode)) != mode
@@ -5943,8 +6023,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
new_temp = gimple_build (&stmts, BIT_FIELD_REF, TREE_TYPE (vectype1),
new_temp, bitsize, bitsize_zero_node);
- new_temp = gimple_build (&stmts, VIEW_CONVERT_EXPR,
- scalar_type, new_temp);
+ new_temp = gimple_convert (&stmts, scalar_type, new_temp);
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
scalar_results.safe_push (new_temp);
}
@@ -6074,10 +6153,11 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
{
new_temp = scalar_results[0];
gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
- adjustment_def = gimple_convert (&stmts, TREE_TYPE (vectype),
+ adjustment_def = gimple_convert (&stmts, TREE_TYPE (compute_vectype),
adjustment_def);
- new_temp = gimple_convert (&stmts, TREE_TYPE (vectype), new_temp);
- new_temp = gimple_build (&stmts, code, TREE_TYPE (vectype),
+ new_temp = gimple_convert (&stmts, TREE_TYPE (compute_vectype),
+ new_temp);
+ new_temp = gimple_build (&stmts, code, TREE_TYPE (compute_vectype),
new_temp, adjustment_def);
new_temp = gimple_convert (&stmts, scalar_type, new_temp);
}
@@ -7017,15 +7097,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
tree vectype_out = SLP_TREE_VECTYPE (slp_for_stmt_info);
VECT_REDUC_INFO_VECTYPE (reduc_info) = vectype_out;
- /* We do not handle mask reductions correctly in the epilogue. */
- if (VECTOR_BOOLEAN_TYPE_P (vectype_out))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "mask reduction not supported.\n");
- return false;
- }
-
gimple_match_op op;
if (!gimple_extract_op (stmt_info->stmt, &op))
gcc_unreachable ();
@@ -7343,6 +7414,23 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
return false;
}
+ /* See if we can convert a mask vector to a corresponding bool data vector
+ to perform the epilogue reduction. */
+ tree alt_vectype_out = NULL_TREE;
+ if (VECTOR_BOOLEAN_TYPE_P (vectype_out))
+ {
+ alt_vectype_out
+ = get_related_vectype_for_scalar_type (loop_vinfo->vector_mode,
+ TREE_TYPE (vectype_out),
+ TYPE_VECTOR_SUBPARTS
+ (vectype_out));
+ if (!alt_vectype_out
+ || maybe_ne (TYPE_VECTOR_SUBPARTS (alt_vectype_out),
+ TYPE_VECTOR_SUBPARTS (vectype_out))
+ || !expand_vec_cond_expr_p (alt_vectype_out, vectype_out))
+ alt_vectype_out = NULL_TREE;
+ }
+
internal_fn reduc_fn = IFN_LAST;
if (reduction_type == TREE_CODE_REDUCTION
|| reduction_type == FOLD_LEFT_REDUCTION
@@ -7353,9 +7441,26 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
? fold_left_reduction_fn (orig_code, &reduc_fn)
: reduction_fn_for_scalar_code (orig_code, &reduc_fn))
{
- if (reduc_fn != IFN_LAST
- && !direct_internal_fn_supported_p (reduc_fn, vectype_out,
- OPTIMIZE_FOR_SPEED))
+ internal_fn sbool_fn = IFN_LAST;
+ if (reduc_fn == IFN_LAST)
+ ;
+ else if ((!VECTOR_BOOLEAN_TYPE_P (vectype_out)
+ || (GET_MODE_CLASS (TYPE_MODE (vectype_out))
+ == MODE_VECTOR_BOOL))
+ && direct_internal_fn_supported_p (reduc_fn, vectype_out,
+ OPTIMIZE_FOR_SPEED))
+ ;
+ else if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
+ && sbool_reduction_fn_for_fn (reduc_fn, &sbool_fn)
+ && direct_internal_fn_supported_p (sbool_fn, vectype_out,
+ OPTIMIZE_FOR_SPEED))
+ reduc_fn = sbool_fn;
+ else if (reduction_type != FOLD_LEFT_REDUCTION
+ && alt_vectype_out
+ && direct_internal_fn_supported_p (reduc_fn, alt_vectype_out,
+ OPTIMIZE_FOR_SPEED))
+ VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info) = alt_vectype_out;
+ else
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7372,6 +7477,19 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
return false;
}
+ if (reduc_fn == IFN_LAST
+ && VECTOR_BOOLEAN_TYPE_P (vectype_out))
+ {
+ if (!alt_vectype_out)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot turn mask into bool data vector for "
+ "reduction epilogue.\n");
+ return false;
+ }
+ VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info) = alt_vectype_out;
+ }
}
else if (reduction_type == COND_REDUCTION)
{