diff options
Diffstat (limited to 'gcc/tree-vect-loop.cc')
-rw-r--r-- | gcc/tree-vect-loop.cc | 82 |
1 files changed, 52 insertions, 30 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 6c24202..9320bf8 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -5219,6 +5219,15 @@ vect_create_partial_epilog (tree vec_def, tree vectype, code_helper code, new_temp = gimple_build (seq, code, vectype1, dst1, dst2); } + if (!useless_type_conversion_p (vectype, TREE_TYPE (new_temp))) + { + tree dst3 = make_ssa_name (vectype); + gimple *epilog_stmt = gimple_build_assign (dst3, VIEW_CONVERT_EXPR, + build1 (VIEW_CONVERT_EXPR, + vectype, new_temp)); + gimple_seq_add_stmt_without_update (seq, epilog_stmt); + new_temp = dst3; + } return new_temp; } @@ -5607,13 +5616,13 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, && VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info) && vectype != VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info)) { - gcc_assert (reduc_inputs.length () == 1); - vectype = VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info); + compute_vectype = vectype = VECT_REDUC_INFO_VECTYPE_FOR_MASK (reduc_info); gimple_seq stmts = NULL; - reduc_inputs[0] = gimple_build (&stmts, VEC_COND_EXPR, vectype, - reduc_inputs[0], - build_one_cst (vectype), - build_zero_cst (vectype)); + for (unsigned i = 0; i < reduc_inputs.length (); ++i) + reduc_inputs[i] = gimple_build (&stmts, VEC_COND_EXPR, vectype, + reduc_inputs[i], + build_one_cst (vectype), + build_zero_cst (vectype)); gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); } @@ -5954,25 +5963,29 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, gcc_assert (exact_log2 (nunits1) != -1 && nunits1 <= nunits); } } - if (!slp_reduc - && (mode1 = targetm.vectorize.split_reduction (mode)) != mode) + else if (!slp_reduc + && (mode1 = targetm.vectorize.split_reduction (mode)) != mode) nunits1 = GET_MODE_NUNITS (mode1).to_constant (); - tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype), - stype, nunits1); + tree vectype1 = vectype; + if (mode1 != mode) + { + vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype), + stype, nunits1); + /* First reduce the vector to the desired vector size we should + do shift reduction on by combining upper and lower halves. */ + gimple_seq stmts = NULL; + new_temp = vect_create_partial_epilog (reduc_inputs[0], vectype1, + code, &stmts); + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); + reduc_inputs[0] = new_temp; + } + reduce_with_shift = have_whole_vector_shift (mode1); if (!VECTOR_MODE_P (mode1) || !directly_supported_p (code, vectype1)) reduce_with_shift = false; - /* First reduce the vector to the desired vector size we should - do shift reduction on by combining upper and lower halves. */ - gimple_seq stmts = NULL; - new_temp = vect_create_partial_epilog (reduc_inputs[0], vectype1, - code, &stmts); - gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); - reduc_inputs[0] = new_temp; - if (reduce_with_shift && (!slp_reduc || group_size == 1)) { tree bitsize = TYPE_SIZE (TREE_TYPE (vectype1)); @@ -6000,7 +6013,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, "Reduce using vector shifts\n"); gimple_seq stmts = NULL; - new_temp = gimple_convert (&stmts, vectype1, new_temp); + new_temp = gimple_convert (&stmts, vectype1, reduc_inputs[0]); for (elt_offset = nelements / 2; elt_offset >= 1; elt_offset /= 2) @@ -6044,13 +6057,13 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, "Reduce using scalar code.\n"); tree compute_type = TREE_TYPE (vectype1); - tree bitsize = TYPE_SIZE (compute_type); - int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1)); - int element_bitsize = tree_to_uhwi (bitsize); + unsigned vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1)); + unsigned element_bitsize = vector_element_bits (vectype1); + tree bitsize = bitsize_int (element_bitsize); gimple_seq stmts = NULL; FOR_EACH_VEC_ELT (reduc_inputs, i, vec_temp) { - int bit_offset; + unsigned bit_offset; new_temp = gimple_build (&stmts, BIT_FIELD_REF, compute_type, vec_temp, bitsize, bitsize_zero_node); @@ -6392,27 +6405,30 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, would also allow generalizing this for reduction paths of length > 1 and/or SLP reductions. */ slp_tree reduc_node = SLP_TREE_CHILDREN (slp_node)[reduc_index]; - tree reduc_var = vect_get_slp_scalar_def (reduc_node, 0); + stmt_vec_info reduc_var_def = SLP_TREE_SCALAR_STMTS (reduc_node)[0]; + tree reduc_var = gimple_get_lhs (STMT_VINFO_STMT (reduc_var_def)); /* The operands either come from a binary operation or an IFN_COND operation. The former is a gimple assign with binary rhs and the latter is a gimple call with four arguments. */ gcc_assert (num_ops == 2 || num_ops == 4); - int group_size = 1; - stmt_vec_info scalar_dest_def_info; auto_vec<tree> vec_oprnds0, vec_opmask; vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[(is_cond_op ? 2 : 0) + (1 - reduc_index)], &vec_oprnds0); - group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); - scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]; /* For an IFN_COND_OP we also need the vector mask operand. */ if (is_cond_op) vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], &vec_opmask); - gimple *sdef = vect_orig_stmt (scalar_dest_def_info)->stmt; - tree scalar_dest = gimple_get_lhs (sdef); + /* The transform below relies on preserving the original scalar PHI + and its latch def which we replace. So work backwards from there. */ + tree scalar_dest + = gimple_phi_arg_def_from_edge (as_a <gphi *> (STMT_VINFO_STMT + (reduc_var_def)), + loop_latch_edge (loop)); + stmt_vec_info scalar_dest_def_info + = vect_stmt_to_vectorize (loop_vinfo->lookup_def (scalar_dest)); tree scalar_type = TREE_TYPE (scalar_dest); int vec_num = vec_oprnds0.length (); @@ -7565,6 +7581,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, values into the low-numbered elements. */ if ((double_reduc || neutral_op) && !nunits_out.is_constant () + && (SLP_TREE_LANES (slp_node) != 1 && !reduc_chain) + && !operand_equal_p (neutral_op, vect_phi_initial_value (reduc_def_phi)) && !direct_internal_fn_supported_p (IFN_VEC_SHL_INSERT, vectype_out, OPTIMIZE_FOR_SPEED)) { @@ -8211,6 +8229,10 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, /* Leave the scalar phi in place. */ return true; + if (reduc_info && reduc_info->is_reduc_chain && dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "vectorizing a reduction chain\n"); + vec_num = vect_get_num_copies (loop_vinfo, slp_node); /* Check whether we should use a single PHI node and accumulate |