diff options
Diffstat (limited to 'gcc/tree-vect-loop.cc')
-rw-r--r-- | gcc/tree-vect-loop.cc | 1869 |
1 files changed, 638 insertions, 1231 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 3de1ea6..575987e 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -1069,10 +1069,12 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) using_decrementing_iv_p (false), using_select_vl_p (false), epil_using_partial_vectors_p (false), + allow_mutual_alignment (false), partial_load_store_bias (0), peeling_for_gaps (false), peeling_for_niter (false), early_breaks (false), + user_unroll (false), no_data_dependencies (false), has_mask_store (false), scalar_loop_scaling (profile_probability::uninitialized ()), @@ -3428,27 +3430,50 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, res ? "succeeded" : "failed", GET_MODE_NAME (loop_vinfo->vector_mode)); - if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) && suggested_unroll_factor > 1) + auto user_unroll = LOOP_VINFO_LOOP (loop_vinfo)->unroll; + if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) + /* Check to see if the user wants to unroll or if the target wants to. */ + && (suggested_unroll_factor > 1 || user_unroll > 1)) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, + if (suggested_unroll_factor == 1) + { + int assumed_vf = vect_vf_for_cost (loop_vinfo); + suggested_unroll_factor = user_unroll / assumed_vf; + if (suggested_unroll_factor > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "setting unroll factor to %d based on user requested " + "unroll factor %d and suggested vectorization " + "factor: %d\n", + suggested_unroll_factor, user_unroll, assumed_vf); + } + } + + if (suggested_unroll_factor > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "***** Re-trying analysis for unrolling" " with unroll factor %d and slp %s.\n", suggested_unroll_factor, slp_done_for_suggested_uf ? "on" : "off"); - loop_vec_info unroll_vinfo - = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL); - unroll_vinfo->vector_mode = vector_mode; - unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor; - opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL, - slp_done_for_suggested_uf); - if (new_res) - { - delete loop_vinfo; - loop_vinfo = unroll_vinfo; - } - else - delete unroll_vinfo; + loop_vec_info unroll_vinfo + = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL); + unroll_vinfo->vector_mode = vector_mode; + unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor; + opt_result new_res + = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL, + slp_done_for_suggested_uf); + if (new_res) + { + delete loop_vinfo; + loop_vinfo = unroll_vinfo; + LOOP_VINFO_USER_UNROLL (loop_vinfo) = user_unroll > 1; + } + else + delete unroll_vinfo; + } } /* Remember the autodetected vector mode. */ @@ -3469,13 +3494,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, mode_i += 1; } if (mode_i + 1 < vector_modes.length () - && VECTOR_MODE_P (autodetected_vector_mode) - && (related_vector_mode (vector_modes[mode_i + 1], - GET_MODE_INNER (autodetected_vector_mode)) - == autodetected_vector_mode) - && (related_vector_mode (autodetected_vector_mode, - GET_MODE_INNER (vector_modes[mode_i + 1])) - == vector_modes[mode_i + 1])) + && vect_chooses_same_modes_p (autodetected_vector_mode, + vector_modes[mode_i + 1])) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -3676,8 +3696,15 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, vector_modes[0] = autodetected_vector_mode; mode_i = 0; - bool supports_partial_vectors = - partial_vectors_supported_p () && param_vect_partial_vector_usage != 0; + bool supports_partial_vectors = param_vect_partial_vector_usage != 0; + machine_mode mask_mode; + if (supports_partial_vectors + && !partial_vectors_supported_p () + && !(VECTOR_MODE_P (first_loop_vinfo->vector_mode) + && targetm.vectorize.get_mask_mode + (first_loop_vinfo->vector_mode).exists (&mask_mode) + && SCALAR_INT_MODE_P (mask_mode))) + supports_partial_vectors = false; poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo); loop_vec_info orig_loop_vinfo = first_loop_vinfo; @@ -3697,6 +3724,22 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, break; continue; } + /* We would need an exhaustive search to find all modes we + skipped but that would lead to the same result as the + analysis it was skipped for and where we'd could check + cached_vf_per_mode against. + Check for the autodetected mode, which is the common + situation on x86 which does not perform cost comparison. */ + if (!supports_partial_vectors + && maybe_ge (cached_vf_per_mode[0], first_vinfo_vf) + && vect_chooses_same_modes_p (autodetected_vector_mode, + vector_modes[mode_i])) + { + mode_i++; + if (mode_i == vector_modes.length ()) + break; + continue; + } if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -4101,6 +4144,10 @@ pop: if (op.ops[2] == op.ops[opi]) neg = ! neg; } + /* For an FMA the reduction code is the PLUS if the addition chain + is the reduction. */ + else if (op.code == IFN_FMA && opi == 2) + op.code = PLUS_EXPR; if (CONVERT_EXPR_CODE_P (op.code) && tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[0]))) ; @@ -4646,7 +4693,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, TODO: Consider assigning different costs to different scalar statements. */ - scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost (); + scalar_single_iter_cost = (loop_vinfo->scalar_costs->total_cost () + * param_vect_scalar_cost_multiplier) / 100; /* Add additional cost for the peeled instructions in prologue and epilogue loop. (For fully-masked loops there will be no peeling.) @@ -5470,76 +5518,6 @@ vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo, } } -/* Function get_initial_def_for_reduction - - Input: - REDUC_INFO - the info_for_reduction - INIT_VAL - the initial value of the reduction variable - NEUTRAL_OP - a value that has no effect on the reduction, as per - neutral_op_for_reduction - - Output: - Return a vector variable, initialized according to the operation that - STMT_VINFO performs. This vector will be used as the initial value - of the vector of partial results. - - The value we need is a vector in which element 0 has value INIT_VAL - and every other element has value NEUTRAL_OP. */ - -static tree -get_initial_def_for_reduction (loop_vec_info loop_vinfo, - stmt_vec_info reduc_info, - tree init_val, tree neutral_op) -{ - class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - tree scalar_type = TREE_TYPE (init_val); - tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); - tree init_def; - gimple_seq stmts = NULL; - - gcc_assert (vectype); - - gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type) - || SCALAR_FLOAT_TYPE_P (scalar_type)); - - gcc_assert (nested_in_vect_loop_p (loop, reduc_info) - || loop == (gimple_bb (reduc_info->stmt))->loop_father); - - if (operand_equal_p (init_val, neutral_op)) - { - /* If both elements are equal then the vector described above is - just a splat. */ - neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype), neutral_op); - init_def = gimple_build_vector_from_val (&stmts, vectype, neutral_op); - } - else - { - neutral_op = gimple_convert (&stmts, TREE_TYPE (vectype), neutral_op); - init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val); - if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()) - { - /* Construct a splat of NEUTRAL_OP and insert INIT_VAL into - element 0. */ - init_def = gimple_build_vector_from_val (&stmts, vectype, - neutral_op); - init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT, - vectype, init_def, init_val); - } - else - { - /* Build {INIT_VAL, NEUTRAL_OP, NEUTRAL_OP, ...}. */ - tree_vector_builder elts (vectype, 1, 2); - elts.quick_push (init_val); - elts.quick_push (neutral_op); - init_def = gimple_build_vector (&stmts, &elts); - } - } - - if (stmts) - vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, stmts); - return init_def; -} - /* Get at the initial defs for the reduction PHIs for REDUC_INFO, which performs a reduction involving GROUP_SIZE scalar statements. NUMBER_OF_VECTORS is the number of vector defs to create. If NEUTRAL_OP @@ -5951,7 +5929,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, loop-closed PHI of the inner loop which we remember as def for the reduction PHI generation. */ bool double_reduc = false; - stmt_vec_info rdef_info = stmt_info; if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) { double_reduc = true; @@ -5981,20 +5958,18 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, auto_vec<tree> reduc_inputs; int j, i; vec<tree> &scalar_results = reduc_info->reduc_scalar_results; - unsigned int group_size = 1, k; + unsigned int k; /* SLP reduction without reduction chain, e.g., # a1 = phi <a2, a0> # b1 = phi <b2, b0> a2 = operation (a1) b2 = operation (b1) */ bool slp_reduc - = (slp_node - && !REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info))); + = !REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info)); bool direct_slp_reduc; tree induction_index = NULL_TREE; - if (slp_node) - group_size = SLP_TREE_LANES (slp_node); + unsigned int group_size = SLP_TREE_LANES (slp_node); if (nested_in_vect_loop_p (loop, stmt_info)) { @@ -6024,18 +5999,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* All statements produce live-out values. */ live_out_stmts = SLP_TREE_SCALAR_STMTS (slp_node); - unsigned vec_num; - int ncopies; - if (slp_node) - { - vec_num = SLP_TREE_VEC_DEFS (slp_node_instance->reduc_phis).length (); - ncopies = 1; - } - else - { - vec_num = 1; - ncopies = STMT_VINFO_VEC_STMTS (reduc_info).length (); - } + unsigned vec_num + = SLP_TREE_VEC_DEFS (slp_node_instance->reduc_phis).length (); /* For cond reductions we want to create a new vector (INDEX_COND_EXPR) which is updated with the current index of the loop for every match of @@ -6050,53 +6015,29 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION) { auto_vec<std::pair<tree, bool>, 2> ccompares; - if (slp_node) + slp_tree cond_node = slp_node_instance->root; + while (cond_node != slp_node_instance->reduc_phis) { - slp_tree cond_node = slp_node_instance->root; - while (cond_node != slp_node_instance->reduc_phis) + stmt_vec_info cond_info = SLP_TREE_REPRESENTATIVE (cond_node); + int slp_reduc_idx; + if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR) { - stmt_vec_info cond_info = SLP_TREE_REPRESENTATIVE (cond_node); - int slp_reduc_idx; - if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR) - { - gimple *vec_stmt - = SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (cond_node)[0]); - gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR); - ccompares.safe_push - (std::make_pair (gimple_assign_rhs1 (vec_stmt), - STMT_VINFO_REDUC_IDX (cond_info) == 2)); - /* ??? We probably want to have REDUC_IDX on the SLP node? - We have both three and four children COND_EXPR nodes - dependent on whether the comparison is still embedded - as GENERIC. So work backwards. */ - slp_reduc_idx = (SLP_TREE_CHILDREN (cond_node).length () - 3 - + STMT_VINFO_REDUC_IDX (cond_info)); - } - else - slp_reduc_idx = STMT_VINFO_REDUC_IDX (cond_info); - cond_node = SLP_TREE_CHILDREN (cond_node)[slp_reduc_idx]; - } - } - else - { - stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info); - cond_info = vect_stmt_to_vectorize (cond_info); - while (cond_info != reduc_info) - { - if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR) - { - gimple *vec_stmt = STMT_VINFO_VEC_STMTS (cond_info)[0]; - gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR); - ccompares.safe_push - (std::make_pair (gimple_assign_rhs1 (vec_stmt), - STMT_VINFO_REDUC_IDX (cond_info) == 2)); - } - cond_info - = loop_vinfo->lookup_def (gimple_op (cond_info->stmt, - 1 + STMT_VINFO_REDUC_IDX - (cond_info))); - cond_info = vect_stmt_to_vectorize (cond_info); + gimple *vec_stmt + = SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (cond_node)[0]); + gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR); + ccompares.safe_push + (std::make_pair (gimple_assign_rhs1 (vec_stmt), + STMT_VINFO_REDUC_IDX (cond_info) == 2)); + /* ??? We probably want to have REDUC_IDX on the SLP node? + We have both three and four children COND_EXPR nodes + dependent on whether the comparison is still embedded + as GENERIC. So work backwards. */ + slp_reduc_idx = (SLP_TREE_CHILDREN (cond_node).length () - 3 + + STMT_VINFO_REDUC_IDX (cond_info)); } + else + slp_reduc_idx = STMT_VINFO_REDUC_IDX (cond_info); + cond_node = SLP_TREE_CHILDREN (cond_node)[slp_reduc_idx]; } gcc_assert (ccompares.length () != 0); @@ -6123,7 +6064,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* Create an induction variable. */ gimple_stmt_iterator incr_gsi; bool insert_after; - vect_iv_increment_position (loop_exit, &incr_gsi, &insert_after); + vect_iv_increment_position (LOOP_VINFO_IV_EXIT (loop_vinfo), + &incr_gsi, &insert_after); create_iv (series_vect, PLUS_EXPR, vec_step, NULL_TREE, loop, &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr); @@ -6205,30 +6147,22 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* We need to reduce values in all exits. */ exit_bb = loop_exit->dest; exit_gsi = gsi_after_labels (exit_bb); - reduc_inputs.create (slp_node ? vec_num : ncopies); + reduc_inputs.create (vec_num); for (unsigned i = 0; i < vec_num; i++) { gimple_seq stmts = NULL; - if (slp_node) - def = vect_get_slp_vect_def (slp_node, i); + def = vect_get_slp_vect_def (slp_node, i); + tree new_def = copy_ssa_name (def); + phi = create_phi_node (new_def, exit_bb); + if (LOOP_VINFO_IV_EXIT (loop_vinfo) == loop_exit) + SET_PHI_ARG_DEF (phi, loop_exit->dest_idx, def); else - def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[0]); - for (j = 0; j < ncopies; j++) - { - tree new_def = copy_ssa_name (def); - phi = create_phi_node (new_def, exit_bb); - if (j) - def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]); - if (LOOP_VINFO_IV_EXIT (loop_vinfo) == loop_exit) - SET_PHI_ARG_DEF (phi, loop_exit->dest_idx, def); - else - { - for (unsigned k = 0; k < gimple_phi_num_args (phi); k++) - SET_PHI_ARG_DEF (phi, k, def); - } - new_def = gimple_convert (&stmts, vectype, new_def); - reduc_inputs.quick_push (new_def); + { + for (unsigned k = 0; k < gimple_phi_num_args (phi); k++) + SET_PHI_ARG_DEF (phi, k, def); } + new_def = gimple_convert (&stmts, vectype, new_def); + reduc_inputs.quick_push (new_def); gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); } @@ -6284,8 +6218,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, if (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info)) || direct_slp_reduc || (slp_reduc - && constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype), group_size)) - || ncopies > 1) + && constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype), group_size))) { gimple_seq stmts = NULL; tree single_input = reduc_inputs[0]; @@ -6579,7 +6512,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, is itself a neutral value. */ tree vector_identity = NULL_TREE; tree neutral_op = NULL_TREE; - if (slp_node) + if (1) { tree initial_value = NULL_TREE; if (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (reduc_info))) @@ -7088,10 +7021,10 @@ static bool vectorize_fold_left_reduction (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - gimple **vec_stmt, slp_tree slp_node, + slp_tree slp_node, gimple *reduc_def_stmt, code_helper code, internal_fn reduc_fn, - tree *ops, int num_ops, tree vectype_in, + int num_ops, tree vectype_in, int reduc_index, vec_loop_masks *masks, vec_loop_lens *lens) { @@ -7099,14 +7032,7 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); internal_fn mask_reduc_fn = get_masked_reduction_fn (reduc_fn, vectype_in); - int ncopies; - if (slp_node) - ncopies = 1; - else - ncopies = vect_get_num_copies (loop_vinfo, vectype_in); - gcc_assert (!nested_in_vect_loop_p (loop, stmt_info)); - gcc_assert (ncopies == 1); bool is_cond_op = false; if (!code.is_tree_code ()) @@ -7118,9 +7044,8 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, gcc_assert (TREE_CODE_LENGTH (tree_code (code)) == binary_op); - if (slp_node) - gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out), - TYPE_VECTOR_SUBPARTS (vectype_in))); + gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out), + TYPE_VECTOR_SUBPARTS (vectype_in))); /* The operands either come from a binary operation or an IFN_COND operation. The former is a gimple assign with binary rhs and the latter is a @@ -7130,36 +7055,14 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, int group_size = 1; stmt_vec_info scalar_dest_def_info; auto_vec<tree> vec_oprnds0, vec_opmask; - if (slp_node) - { - vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[(is_cond_op ? 2 : 0) - + (1 - reduc_index)], - &vec_oprnds0); - group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); - scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]; - /* For an IFN_COND_OP we also need the vector mask operand. */ - if (is_cond_op) - vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], &vec_opmask); - } - else - { - tree op0, opmask; - if (!is_cond_op) - op0 = ops[1 - reduc_index]; - else - { - op0 = ops[2 + (1 - reduc_index)]; - opmask = ops[0]; - } - vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1, - op0, &vec_oprnds0); - scalar_dest_def_info = stmt_info; - - /* For an IFN_COND_OP we also need the vector mask operand. */ - if (is_cond_op) - vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1, - opmask, &vec_opmask); - } + vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[(is_cond_op ? 2 : 0) + + (1 - reduc_index)], + &vec_oprnds0); + group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); + scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]; + /* For an IFN_COND_OP we also need the vector mask operand. */ + if (is_cond_op) + vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], &vec_opmask); gimple *sdef = vect_orig_stmt (scalar_dest_def_info)->stmt; tree scalar_dest = gimple_get_lhs (sdef); @@ -7167,7 +7070,6 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, tree reduc_var = gimple_phi_result (reduc_def_stmt); int vec_num = vec_oprnds0.length (); - gcc_assert (vec_num == 1 || slp_node); tree vec_elem_type = TREE_TYPE (vectype_out); gcc_checking_assert (useless_type_conversion_p (scalar_type, vec_elem_type)); @@ -7279,13 +7181,7 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, scalar_dest_def_info, new_stmt, gsi); - if (slp_node) - slp_node->push_vec_def (new_stmt); - else - { - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); - *vec_stmt = new_stmt; - } + slp_node->push_vec_def (new_stmt); } return true; @@ -7543,7 +7439,7 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, return false; } - if (slp_node && !vect_maybe_update_slp_op_vectype (slp_op, vectype)) + if (!vect_maybe_update_slp_op_vectype (slp_op, vectype)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7682,23 +7578,20 @@ vectorizable_reduction (loop_vec_info loop_vinfo, { if (is_a <gphi *> (stmt_info->stmt)) { - if (slp_node) - { - /* We eventually need to set a vector type on invariant - arguments. */ - unsigned j; - slp_tree child; - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child) - if (!vect_maybe_update_slp_op_vectype - (child, SLP_TREE_VECTYPE (slp_node))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "incompatible vector types for " - "invariants\n"); - return false; - } - } + /* We eventually need to set a vector type on invariant + arguments. */ + unsigned j; + slp_tree child; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child) + if (!vect_maybe_update_slp_op_vectype + (child, SLP_TREE_VECTYPE (slp_node))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for " + "invariants\n"); + return false; + } /* Analysis for double-reduction is done on the outer loop PHI, nested cycles have no further restrictions. */ STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type; @@ -7723,7 +7616,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo, the outer loop PHI. The latter is what we want to analyze the reduction with. The LC PHI is handled by vectorizable_lc_phi. */ - gcc_assert (slp_node); return gimple_phi_num_args (as_a <gphi *> (stmt_info->stmt)) == 2; } use_operand_p use_p; @@ -7734,13 +7626,10 @@ vectorizable_reduction (loop_vec_info loop_vinfo, phi_info = loop_vinfo->lookup_stmt (use_stmt); } - if (slp_node) - { - slp_node_instance->reduc_phis = slp_node; - /* ??? We're leaving slp_node to point to the PHIs, we only - need it to get at the number of vector stmts which wasn't - yet initialized for the instance root. */ - } + slp_node_instance->reduc_phis = slp_node; + /* ??? We're leaving slp_node to point to the PHIs, we only + need it to get at the number of vector stmts which wasn't + yet initialized for the instance root. */ /* PHIs should not participate in patterns. */ gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info)); @@ -7756,11 +7645,10 @@ vectorizable_reduction (loop_vec_info loop_vinfo, unsigned reduc_chain_length = 0; bool only_slp_reduc_chain = true; stmt_info = NULL; - slp_tree slp_for_stmt_info = slp_node ? slp_node_instance->root : NULL; + slp_tree slp_for_stmt_info = slp_node_instance->root; /* For double-reductions we start SLP analysis at the inner loop LC PHI which is the def of the outer loop live stmt. */ - if (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def - && slp_node) + if (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def) slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0]; while (reduc_def != PHI_RESULT (reduc_def_phi)) { @@ -7863,7 +7751,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, reduc_def = op.ops[reduc_idx]; reduc_chain_length++; - if (!stmt_info && slp_node) + if (!stmt_info) slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0]; } /* PHIs should not participate in patterns. */ @@ -7877,14 +7765,13 @@ vectorizable_reduction (loop_vec_info loop_vinfo, /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last element. */ - if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) + if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) { gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info)); stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info); } if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) - gcc_assert (slp_node - && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info); + gcc_assert (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info); /* 1. Is vectorizable reduction? */ /* Not supportable if the reduction variable is used in the loop, unless @@ -7939,7 +7826,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo, OK to use them in a reduction chain or when the reduction group has just one element. */ if (lane_reducing - && slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) && SLP_TREE_LANES (slp_node) > 1) { @@ -8017,7 +7903,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, /* If we have a condition reduction, see if we can simplify it further. */ if (reduction_type == COND_REDUCTION) { - if (slp_node && SLP_TREE_LANES (slp_node) != 1) + if (SLP_TREE_LANES (slp_node) != 1) return false; /* When the condition uses the reduction value in the condition, fail. */ @@ -8119,10 +8005,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, if (STMT_VINFO_LIVE_P (phi_info)) return false; - if (slp_node) - ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - else - ncopies = vect_get_num_copies (loop_vinfo, vectype_in); + ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); gcc_assert (ncopies >= 1); @@ -8194,8 +8077,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, outer-loop vectorization is safe. Likewise when we are vectorizing a series of reductions using SLP and the VF is one the reductions are performed in scalar order. */ - if (slp_node - && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) + if (!REDUC_GROUP_FIRST_ELEMENT (stmt_info) && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1u)) ; else if (needs_fold_left_reduction_p (op.type, orig_code)) @@ -8210,6 +8092,19 @@ vectorizable_reduction (loop_vec_info loop_vinfo, "in-order reduction chain without SLP.\n"); return false; } + /* Code generation doesn't support function calls other + than .COND_*. */ + if (!op.code.is_tree_code () + && !(op.code.is_internal_fn () + && conditional_internal_fn_code (internal_fn (op.code)) + != ERROR_MARK)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "in-order reduction chain operation not " + "supported.\n"); + return false; + } STMT_VINFO_REDUC_TYPE (reduc_info) = reduction_type = FOLD_LEFT_REDUCTION; } @@ -8227,7 +8122,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, || reduction_type == INTEGER_INDUC_COND_REDUCTION || reduction_type == CONST_COND_REDUCTION || reduction_type == EXTRACT_LAST_REDUCTION) - && slp_node + && 1 && ncopies > 1) { if (dump_enabled_p ()) @@ -8236,17 +8131,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo, return false; } - if ((double_reduc || reduction_type != TREE_CODE_REDUCTION) - && !slp_node - && ncopies > 1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "multiple types in double reduction or condition " - "reduction or fold-left reduction.\n"); - return false; - } - internal_fn reduc_fn = IFN_LAST; if (reduction_type == TREE_CODE_REDUCTION || reduction_type == FOLD_LEFT_REDUCTION @@ -8308,14 +8192,11 @@ vectorizable_reduction (loop_vec_info loop_vinfo, /* For SLP reductions, see if there is a neutral value we can use. */ tree neutral_op = NULL_TREE; - if (slp_node) - { - tree initial_value = NULL_TREE; - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL) - initial_value = vect_phi_initial_value (reduc_def_phi); - neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out), - orig_code, initial_value); - } + tree initial_value = NULL_TREE; + if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL) + initial_value = vect_phi_initial_value (reduc_def_phi); + neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out), + orig_code, initial_value); if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION) { @@ -8341,7 +8222,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, } if (reduction_type == FOLD_LEFT_REDUCTION - && (slp_node && SLP_TREE_LANES (slp_node) > 1) + && SLP_TREE_LANES (slp_node) > 1 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)) { /* We cannot use in-order reductions in this case because there is @@ -8370,8 +8251,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, } /* Check extra constraints for variable-length unchained SLP reductions. */ - if (slp_node - && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) + if (!REDUC_GROUP_FIRST_ELEMENT (stmt_info) && !nunits_out.is_constant ()) { /* We checked above that we could build the initial vector when @@ -8465,9 +8345,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, own reduction accumulator since one of the main goals of unrolling a reduction is to reduce the aggregate loop-carried latency. */ if (ncopies > 1 - && (!slp_node - || (!REDUC_GROUP_FIRST_ELEMENT (stmt_info) - && SLP_TREE_LANES (slp_node) == 1)) + && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) + && SLP_TREE_LANES (slp_node) == 1 && (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live) && reduc_chain_length == 1 && loop_vinfo->suggested_unroll_factor == 1) @@ -8518,8 +8397,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, reduction. */ single_defuse_cycle &= !lane_reducing; - if (slp_node - && (single_defuse_cycle || reduction_type == FOLD_LEFT_REDUCTION)) + if (single_defuse_cycle || reduction_type == FOLD_LEFT_REDUCTION) for (i = 0; i < (int) op.num_ops; i++) if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_op[i])) { @@ -8549,7 +8427,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, { stmt_vec_info tem = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info)); - if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem)) + if (REDUC_GROUP_FIRST_ELEMENT (tem)) { gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem)); tem = REDUC_GROUP_FIRST_ELEMENT (tem); @@ -8645,11 +8523,10 @@ vect_emulate_mixed_dot_prod (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, bool vect_transform_reduction (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - gimple **vec_stmt, slp_tree slp_node) + slp_tree slp_node) { tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - unsigned ncopies; unsigned vec_num; stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); @@ -8677,16 +8554,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo, if (!vectype_in) vectype_in = STMT_VINFO_VECTYPE (stmt_info); - if (slp_node) - { - ncopies = 1; - vec_num = vect_get_num_copies (loop_vinfo, slp_node, vectype_in); - } - else - { - ncopies = vect_get_num_copies (loop_vinfo, vectype_in); - vec_num = 1; - } + vec_num = vect_get_num_copies (loop_vinfo, slp_node, vectype_in); code_helper code = canonicalize_code (op.code, op.type); internal_fn cond_fn = get_conditional_internal_fn (code, op.type); @@ -8702,10 +8570,6 @@ vect_transform_reduction (loop_vec_info loop_vinfo, if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n"); - /* FORNOW: Multiple types are not supported for condition. */ - if (code == COND_EXPR) - gcc_assert (ncopies == 1); - /* A binary COND_OP reduction must have the same definition and else value. */ bool cond_fn_p = code.is_internal_fn () @@ -8729,8 +8593,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo, internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info); gcc_assert (code.is_tree_code () || cond_fn_p); return vectorize_fold_left_reduction - (loop_vinfo, stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, - code, reduc_fn, op.ops, op.num_ops, vectype_in, + (loop_vinfo, stmt_info, gsi, slp_node, reduc_def_phi, + code, reduc_fn, op.num_ops, vectype_in, reduc_index, masks, lens); } @@ -8748,33 +8612,12 @@ vect_transform_reduction (loop_vec_info loop_vinfo, tree scalar_dest = gimple_get_lhs (stmt_info->stmt); tree vec_dest = vect_create_destination_var (scalar_dest, vectype_out); - if (lane_reducing && !slp_node && !single_defuse_cycle) - { - /* Note: there are still vectorizable cases that can not be handled by - single-lane slp. Probably it would take some time to evolve the - feature to a mature state. So we have to keep the below non-slp code - path as failsafe for lane-reducing support. */ - gcc_assert (op.num_ops <= 3); - for (unsigned i = 0; i < op.num_ops; i++) - { - unsigned oprnd_ncopies = ncopies; - - if ((int) i == reduc_index) - { - tree vectype = STMT_VINFO_VECTYPE (stmt_info); - oprnd_ncopies = vect_get_num_copies (loop_vinfo, vectype); - } - - vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, oprnd_ncopies, - op.ops[i], &vec_oprnds[i]); - } - } /* Get NCOPIES vector definitions for all operands except the reduction definition. */ - else if (!cond_fn_p) + if (!cond_fn_p) { gcc_assert (reduc_index >= 0 && reduc_index <= 2); - vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies, + vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, 1, single_defuse_cycle && reduc_index == 0 ? NULL_TREE : op.ops[0], &vec_oprnds[0], single_defuse_cycle && reduc_index == 1 @@ -8789,7 +8632,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo, vectype. */ gcc_assert (single_defuse_cycle && (reduc_index == 1 || reduc_index == 2)); - vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies, op.ops[0], + vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, 1, op.ops[0], truth_type_for (vectype_in), &vec_oprnds[0], reduc_index == 1 ? NULL_TREE : op.ops[1], NULL_TREE, &vec_oprnds[1], @@ -8945,7 +8788,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo, handle over the operand to other dependent statements. */ gcc_assert (reduc_vop); - if (slp_node && TREE_CODE (reduc_vop) == SSA_NAME + if (TREE_CODE (reduc_vop) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (reduc_vop)) new_stmt = SSA_NAME_DEF_STMT (reduc_vop); else @@ -8969,7 +8812,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo, std::swap (vop[0], vop[1]); } tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, - vec_num * ncopies, vectype_in, + vec_num, vectype_in, mask_index++); gcall *call = gimple_build_call_internal (cond_fn, 4, mask, vop[0], vop[1], vop[0]); @@ -8987,7 +8830,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo, if (masked_loop_p && mask_by_cond_expr) { tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, - vec_num * ncopies, vectype_in, + vec_num, vectype_in, mask_index++); build_vect_cond_expr (code, vop, mask, gsi); } @@ -9015,15 +8858,10 @@ vect_transform_reduction (loop_vec_info loop_vinfo, if (single_defuse_cycle && i < num - 1) vec_oprnds[reduc_index].safe_push (gimple_get_lhs (new_stmt)); - else if (slp_node) - slp_node->push_vec_def (new_stmt); else - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); + slp_node->push_vec_def (new_stmt); } - if (!slp_node) - *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; - return true; } @@ -9031,14 +8869,12 @@ vect_transform_reduction (loop_vec_info loop_vinfo, bool vect_transform_cycle_phi (loop_vec_info loop_vinfo, - stmt_vec_info stmt_info, gimple **vec_stmt, + stmt_vec_info stmt_info, slp_tree slp_node, slp_instance slp_node_instance) { tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); int i; - int ncopies; - int j; bool nested_cycle = false; int vec_num; @@ -9058,25 +8894,12 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, /* Leave the scalar phi in place. */ return true; - if (slp_node) - { - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - ncopies = 1; - } - else - { - vec_num = 1; - ncopies = vect_get_num_copies (loop_vinfo, - STMT_VINFO_VECTYPE (stmt_info)); - } + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); /* Check whether we should use a single PHI node and accumulate vectors to one before the backedge. */ if (STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info)) - { - ncopies = 1; - vec_num = 1; - } + vec_num = 1; /* Create the destination vector */ gphi *phi = as_a <gphi *> (stmt_info->stmt); @@ -9086,158 +8909,86 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, /* Get the loop-entry arguments. */ tree vec_initial_def = NULL_TREE; auto_vec<tree> vec_initial_defs; - if (slp_node) + vec_initial_defs.reserve (vec_num); + /* Optimize: if initial_def is for REDUC_MAX smaller than the base + and we can't use zero for induc_val, use initial_def. Similarly + for REDUC_MIN and initial_def larger than the base. */ + if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) { - vec_initial_defs.reserve (vec_num); - /* Optimize: if initial_def is for REDUC_MAX smaller than the base - and we can't use zero for induc_val, use initial_def. Similarly - for REDUC_MIN and initial_def larger than the base. */ - if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) - { - gcc_assert (SLP_TREE_LANES (slp_node) == 1); - tree initial_def = vect_phi_initial_value (phi); - reduc_info->reduc_initial_values.safe_push (initial_def); - tree induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info); - if (TREE_CODE (initial_def) == INTEGER_CST - && !integer_zerop (induc_val) - && ((STMT_VINFO_REDUC_CODE (reduc_info) == MAX_EXPR - && tree_int_cst_lt (initial_def, induc_val)) - || (STMT_VINFO_REDUC_CODE (reduc_info) == MIN_EXPR - && tree_int_cst_lt (induc_val, initial_def)))) - { - induc_val = initial_def; - /* Communicate we used the initial_def to epilouge - generation. */ - STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE; - } - vec_initial_defs.quick_push - (build_vector_from_val (vectype_out, induc_val)); - } - else if (nested_cycle) + gcc_assert (SLP_TREE_LANES (slp_node) == 1); + tree initial_def = vect_phi_initial_value (phi); + reduc_info->reduc_initial_values.safe_push (initial_def); + tree induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info); + if (TREE_CODE (initial_def) == INTEGER_CST + && !integer_zerop (induc_val) + && ((STMT_VINFO_REDUC_CODE (reduc_info) == MAX_EXPR + && tree_int_cst_lt (initial_def, induc_val)) + || (STMT_VINFO_REDUC_CODE (reduc_info) == MIN_EXPR + && tree_int_cst_lt (induc_val, initial_def)))) { - unsigned phi_idx = loop_preheader_edge (loop)->dest_idx; - vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[phi_idx], - &vec_initial_defs); - } - else - { - gcc_assert (slp_node == slp_node_instance->reduc_phis); - vec<tree> &initial_values = reduc_info->reduc_initial_values; - vec<stmt_vec_info> &stmts = SLP_TREE_SCALAR_STMTS (slp_node); - - unsigned int num_phis = stmts.length (); - if (REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info)) - num_phis = 1; - initial_values.reserve (num_phis); - for (unsigned int i = 0; i < num_phis; ++i) - { - gphi *this_phi = as_a<gphi *> (stmts[i]->stmt); - initial_values.quick_push (vect_phi_initial_value (this_phi)); - } - if (vec_num == 1) - vect_find_reusable_accumulator (loop_vinfo, reduc_info); - if (!initial_values.is_empty ()) - { - tree initial_value - = (num_phis == 1 ? initial_values[0] : NULL_TREE); - code_helper code = STMT_VINFO_REDUC_CODE (reduc_info); - tree neutral_op - = neutral_op_for_reduction (TREE_TYPE (vectype_out), - code, initial_value); - /* Try to simplify the vector initialization by applying an - adjustment after the reduction has been performed. This - can also break a critical path but on the other hand - requires to keep the initial value live across the loop. */ - if (neutral_op - && initial_values.length () == 1 - && !reduc_info->reused_accumulator - && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def - && !operand_equal_p (neutral_op, initial_values[0])) - { - STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) - = initial_values[0]; - initial_values[0] = neutral_op; - } - get_initial_defs_for_reduction (loop_vinfo, reduc_info, - &vec_initial_defs, vec_num, - stmts.length (), neutral_op); - } + induc_val = initial_def; + /* Communicate we used the initial_def to epilouge + generation. */ + STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE; } + vec_initial_defs.quick_push + (build_vector_from_val (vectype_out, induc_val)); + } + else if (nested_cycle) + { + unsigned phi_idx = loop_preheader_edge (loop)->dest_idx; + vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[phi_idx], + &vec_initial_defs); } else { - /* Get at the scalar def before the loop, that defines the initial - value of the reduction variable. */ - tree initial_def = vect_phi_initial_value (phi); - reduc_info->reduc_initial_values.safe_push (initial_def); - /* Optimize: if initial_def is for REDUC_MAX smaller than the base - and we can't use zero for induc_val, use initial_def. Similarly - for REDUC_MIN and initial_def larger than the base. */ - if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) - { - tree induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info); - if (TREE_CODE (initial_def) == INTEGER_CST - && !integer_zerop (induc_val) - && ((STMT_VINFO_REDUC_CODE (reduc_info) == MAX_EXPR - && tree_int_cst_lt (initial_def, induc_val)) - || (STMT_VINFO_REDUC_CODE (reduc_info) == MIN_EXPR - && tree_int_cst_lt (induc_val, initial_def)))) + gcc_assert (slp_node == slp_node_instance->reduc_phis); + vec<tree> &initial_values = reduc_info->reduc_initial_values; + vec<stmt_vec_info> &stmts = SLP_TREE_SCALAR_STMTS (slp_node); + + unsigned int num_phis = stmts.length (); + if (REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info)) + num_phis = 1; + initial_values.reserve (num_phis); + for (unsigned int i = 0; i < num_phis; ++i) + { + gphi *this_phi = as_a<gphi *> (stmts[i]->stmt); + initial_values.quick_push (vect_phi_initial_value (this_phi)); + } + if (vec_num == 1) + vect_find_reusable_accumulator (loop_vinfo, reduc_info); + if (!initial_values.is_empty ()) + { + tree initial_value + = (num_phis == 1 ? initial_values[0] : NULL_TREE); + code_helper code = STMT_VINFO_REDUC_CODE (reduc_info); + tree neutral_op + = neutral_op_for_reduction (TREE_TYPE (vectype_out), + code, initial_value); + /* Try to simplify the vector initialization by applying an + adjustment after the reduction has been performed. This + can also break a critical path but on the other hand + requires to keep the initial value live across the loop. */ + if (neutral_op + && initial_values.length () == 1 + && !reduc_info->reused_accumulator + && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def + && !operand_equal_p (neutral_op, initial_values[0])) { - induc_val = initial_def; - /* Communicate we used the initial_def to epilouge - generation. */ - STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE; - } - vec_initial_def = build_vector_from_val (vectype_out, induc_val); - } - else if (nested_cycle) - { - /* Do not use an adjustment def as that case is not supported - correctly if ncopies is not one. */ - vect_get_vec_defs_for_operand (loop_vinfo, reduc_stmt_info, - ncopies, initial_def, - &vec_initial_defs); - } - else if (STMT_VINFO_REDUC_TYPE (reduc_info) == CONST_COND_REDUCTION - || STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION) - /* Fill the initial vector with the initial scalar value. */ - vec_initial_def - = get_initial_def_for_reduction (loop_vinfo, reduc_stmt_info, - initial_def, initial_def); - else - { - if (ncopies == 1) - vect_find_reusable_accumulator (loop_vinfo, reduc_info); - if (!reduc_info->reduc_initial_values.is_empty ()) - { - initial_def = reduc_info->reduc_initial_values[0]; - code_helper code = STMT_VINFO_REDUC_CODE (reduc_info); - tree neutral_op - = neutral_op_for_reduction (TREE_TYPE (initial_def), - code, initial_def); - gcc_assert (neutral_op); - /* Try to simplify the vector initialization by applying an - adjustment after the reduction has been performed. */ - if (!reduc_info->reused_accumulator - && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def - && !operand_equal_p (neutral_op, initial_def)) - { - STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) - = initial_def; - initial_def = neutral_op; - } - vec_initial_def - = get_initial_def_for_reduction (loop_vinfo, reduc_info, - initial_def, neutral_op); + STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) + = initial_values[0]; + initial_values[0] = neutral_op; } + get_initial_defs_for_reduction (loop_vinfo, reduc_info, + &vec_initial_defs, vec_num, + stmts.length (), neutral_op); } } if (vec_initial_def) { - vec_initial_defs.create (ncopies); - for (i = 0; i < ncopies; ++i) - vec_initial_defs.quick_push (vec_initial_def); + vec_initial_defs.create (1); + vec_initial_defs.quick_push (vec_initial_def); } if (auto *accumulator = reduc_info->reused_accumulator) @@ -9312,29 +9063,15 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, for (i = 0; i < vec_num; i++) { tree vec_init_def = vec_initial_defs[i]; - for (j = 0; j < ncopies; j++) - { - /* Create the reduction-phi that defines the reduction - operand. */ - gphi *new_phi = create_phi_node (vec_dest, loop->header); - - /* Set the loop-entry arg of the reduction-phi. */ - if (j != 0 && nested_cycle) - vec_init_def = vec_initial_defs[j]; - add_phi_arg (new_phi, vec_init_def, loop_preheader_edge (loop), - UNKNOWN_LOCATION); + /* Create the reduction-phi that defines the reduction + operand. */ + gphi *new_phi = create_phi_node (vec_dest, loop->header); + add_phi_arg (new_phi, vec_init_def, loop_preheader_edge (loop), + UNKNOWN_LOCATION); - /* The loop-latch arg is set in epilogue processing. */ + /* The loop-latch arg is set in epilogue processing. */ - if (slp_node) - slp_node->push_vec_def (new_phi); - else - { - if (j == 0) - *vec_stmt = new_phi; - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_phi); - } - } + slp_node->push_vec_def (new_phi); } return true; @@ -9344,7 +9081,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, bool vectorizable_lc_phi (loop_vec_info loop_vinfo, - stmt_vec_info stmt_info, gimple **vec_stmt, + stmt_vec_info stmt_info, slp_tree slp_node) { if (!loop_vinfo @@ -9356,22 +9093,25 @@ vectorizable_lc_phi (loop_vec_info loop_vinfo, && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def) return false; - if (!vec_stmt) /* transformation not required. */ + /* Deal with copies from externs or constants that disguise as + loop-closed PHI nodes (PR97886). */ + if (!vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0], + SLP_TREE_VECTYPE (slp_node))) { - /* Deal with copies from externs or constants that disguise as - loop-closed PHI nodes (PR97886). */ - if (slp_node - && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0], - SLP_TREE_VECTYPE (slp_node))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "incompatible vector types for invariants\n"); - return false; - } - STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type; - return true; + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for invariants\n"); + return false; } + STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type; + return true; +} + +bool +vect_transform_lc_phi (loop_vec_info loop_vinfo, + stmt_vec_info stmt_info, + slp_tree slp_node) +{ tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree scalar_dest = gimple_phi_result (stmt_info->stmt); @@ -9379,21 +9119,15 @@ vectorizable_lc_phi (loop_vec_info loop_vinfo, edge e = single_pred_edge (bb); tree vec_dest = vect_create_destination_var (scalar_dest, vectype); auto_vec<tree> vec_oprnds; - vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, - !slp_node ? vect_get_num_copies (loop_vinfo, vectype) : 1, + vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, 1, gimple_phi_arg_def (stmt_info->stmt, 0), &vec_oprnds); for (unsigned i = 0; i < vec_oprnds.length (); i++) { /* Create the vectorized LC PHI node. */ gphi *new_phi = create_phi_node (vec_dest, bb); add_phi_arg (new_phi, vec_oprnds[i], e, UNKNOWN_LOCATION); - if (slp_node) - slp_node->push_vec_def (new_phi); - else - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_phi); + slp_node->push_vec_def (new_phi); } - if (!slp_node) - *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; return true; } @@ -10026,7 +9760,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, gphi *phi = dyn_cast <gphi *> (stmt_info->stmt); - tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree vectype = SLP_TREE_VECTYPE (slp_node); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); enum vect_induction_op_type induction_type = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info); @@ -10051,7 +9785,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, /* TODO: Support multi-lane SLP for nonlinear iv. There should be separate vector iv update for each iv and a permutation to generate wanted vector iv. */ - if (slp_node && SLP_TREE_LANES (slp_node) > 1) + if (SLP_TREE_LANES (slp_node) > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -10262,13 +9996,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), UNKNOWN_LOCATION); - if (slp_node) - slp_node->push_vec_def (induction_phi); - else - { - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi); - *vec_stmt = induction_phi; - } + slp_node->push_vec_def (induction_phi); /* In case that vectorization factor (VF) is bigger than the number of elements that we can fit in a vectype (nunits), we have to generate @@ -10298,10 +10026,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, induction_type); gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); new_stmt = SSA_NAME_DEF_STMT (vec_def); - if (slp_node) - slp_node->push_vec_def (new_stmt); - else - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); + slp_node->push_vec_def (new_stmt); } } @@ -10327,15 +10052,13 @@ vectorizable_induction (loop_vec_info loop_vinfo, stmt_vector_for_cost *cost_vec) { class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - unsigned ncopies; bool nested_in_vect_loop = false; class loop *iv_loop; tree vec_def; edge pe = loop_preheader_edge (loop); basic_block new_bb; - tree new_vec, vec_init = NULL_TREE, vec_step, t; + tree vec_init = NULL_TREE, vec_step, t; tree new_name; - gimple *new_stmt; gphi *induction_phi; tree induc_def, vec_dest; poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); @@ -10362,15 +10085,9 @@ vectorizable_induction (loop_vec_info loop_vinfo, return vectorizable_nonlinear_induction (loop_vinfo, stmt_info, vec_stmt, slp_node, cost_vec); - tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree vectype = SLP_TREE_VECTYPE (slp_node); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); - if (slp_node) - ncopies = 1; - else - ncopies = vect_get_num_copies (loop_vinfo, vectype); - gcc_assert (ncopies >= 1); - /* FORNOW. These restrictions should be relaxed. */ if (nested_in_vect_loop_p (loop, stmt_info)) { @@ -10380,14 +10097,6 @@ vectorizable_induction (loop_vec_info loop_vinfo, edge latch_e; tree loop_arg; - if (ncopies > 1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "multiple types in nested loop.\n"); - return false; - } - exit_phi = NULL; latch_e = loop_latch_edge (loop->inner); loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); @@ -10424,7 +10133,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, iv_loop = loop; gcc_assert (iv_loop == (gimple_bb (phi))->loop_father); - if (slp_node && (!nunits.is_constant () && SLP_TREE_LANES (slp_node) != 1)) + if (!nunits.is_constant () && SLP_TREE_LANES (slp_node) != 1) { /* The current SLP code creates the step value element-by-element. */ if (dump_enabled_p ()) @@ -10480,41 +10189,28 @@ vectorizable_induction (loop_vec_info loop_vinfo, if (!vec_stmt) /* transformation not required. */ { unsigned inside_cost = 0, prologue_cost = 0; - if (slp_node) - { - /* We eventually need to set a vector type on invariant - arguments. */ - unsigned j; - slp_tree child; - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child) - if (!vect_maybe_update_slp_op_vectype - (child, SLP_TREE_VECTYPE (slp_node))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "incompatible vector types for " - "invariants\n"); - return false; - } - /* loop cost for vec_loop. */ - inside_cost - = record_stmt_cost (cost_vec, - SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), - vector_stmt, stmt_info, 0, vect_body); - /* prologue cost for vec_init (if not nested) and step. */ - prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop, - scalar_to_vec, - stmt_info, 0, vect_prologue); - } - else /* if (!slp_node) */ - { - /* loop cost for vec_loop. */ - inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt, - stmt_info, 0, vect_body); - /* prologue cost for vec_init and vec_step. */ - prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec, - stmt_info, 0, vect_prologue); - } + /* We eventually need to set a vector type on invariant + arguments. */ + unsigned j; + slp_tree child; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child) + if (!vect_maybe_update_slp_op_vectype + (child, SLP_TREE_VECTYPE (slp_node))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for " + "invariants\n"); + return false; + } + /* loop cost for vec_loop. */ + inside_cost = record_stmt_cost (cost_vec, + SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), + vector_stmt, stmt_info, 0, vect_body); + /* prologue cost for vec_init (if not nested) and step. */ + prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop, + scalar_to_vec, + stmt_info, 0, vect_prologue); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vect_model_induction_cost: inside_cost = %d, " @@ -10545,670 +10241,374 @@ vectorizable_induction (loop_vec_info loop_vinfo, with group size 3 we need [i0, i1, i2, i0 + S0] [i1 + S1, i2 + S2, i0 + 2*S0, i1 + 2*S1] [i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */ - if (slp_node) + gimple_stmt_iterator incr_si; + bool insert_after; + standard_iv_increment_position (iv_loop, &incr_si, &insert_after); + + /* The initial values are vectorized, but any lanes > group_size + need adjustment. */ + slp_tree init_node + = SLP_TREE_CHILDREN (slp_node)[pe->dest_idx]; + + /* Gather steps. Since we do not vectorize inductions as + cycles we have to reconstruct the step from SCEV data. */ + unsigned group_size = SLP_TREE_LANES (slp_node); + tree *steps = XALLOCAVEC (tree, group_size); + tree *inits = XALLOCAVEC (tree, group_size); + stmt_vec_info phi_info; + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info) + { + steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info); + if (!init_node) + inits[i] = gimple_phi_arg_def (as_a<gphi *> (phi_info->stmt), + pe->dest_idx); + } + + /* Now generate the IVs. */ + unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + gcc_assert (multiple_p (nunits * nvects, group_size)); + unsigned nivs; + unsigned HOST_WIDE_INT const_nunits; + if (nested_in_vect_loop) + nivs = nvects; + else if (nunits.is_constant (&const_nunits)) { - gimple_stmt_iterator incr_si; - bool insert_after; - standard_iv_increment_position (iv_loop, &incr_si, &insert_after); - - /* The initial values are vectorized, but any lanes > group_size - need adjustment. */ - slp_tree init_node - = SLP_TREE_CHILDREN (slp_node)[pe->dest_idx]; - - /* Gather steps. Since we do not vectorize inductions as - cycles we have to reconstruct the step from SCEV data. */ - unsigned group_size = SLP_TREE_LANES (slp_node); - tree *steps = XALLOCAVEC (tree, group_size); - tree *inits = XALLOCAVEC (tree, group_size); - stmt_vec_info phi_info; - FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info) - { - steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info); - if (!init_node) - inits[i] = gimple_phi_arg_def (as_a<gphi *> (phi_info->stmt), - pe->dest_idx); - } - - /* Now generate the IVs. */ - unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - gcc_assert (multiple_p (nunits * nvects, group_size)); - unsigned nivs; - unsigned HOST_WIDE_INT const_nunits; - if (nested_in_vect_loop) - nivs = nvects; - else if (nunits.is_constant (&const_nunits)) - { - /* Compute the number of distinct IVs we need. First reduce - group_size if it is a multiple of const_nunits so we get - one IV for a group_size of 4 but const_nunits 2. */ - unsigned group_sizep = group_size; - if (group_sizep % const_nunits == 0) - group_sizep = group_sizep / const_nunits; - nivs = least_common_multiple (group_sizep, - const_nunits) / const_nunits; - } - else - { - gcc_assert (SLP_TREE_LANES (slp_node) == 1); - nivs = 1; - } - gimple_seq init_stmts = NULL; - tree lupdate_mul = NULL_TREE; - if (!nested_in_vect_loop) + /* Compute the number of distinct IVs we need. First reduce + group_size if it is a multiple of const_nunits so we get + one IV for a group_size of 4 but const_nunits 2. */ + unsigned group_sizep = group_size; + if (group_sizep % const_nunits == 0) + group_sizep = group_sizep / const_nunits; + nivs = least_common_multiple (group_sizep, const_nunits) / const_nunits; + } + else + { + gcc_assert (SLP_TREE_LANES (slp_node) == 1); + nivs = 1; + } + gimple_seq init_stmts = NULL; + tree lupdate_mul = NULL_TREE; + if (!nested_in_vect_loop) + { + if (nunits.is_constant (&const_nunits)) { - if (nunits.is_constant (&const_nunits)) - { - /* The number of iterations covered in one vector iteration. */ - unsigned lup_mul = (nvects * const_nunits) / group_size; - lupdate_mul - = build_vector_from_val (step_vectype, - SCALAR_FLOAT_TYPE_P (stept) - ? build_real_from_wide (stept, lup_mul, - UNSIGNED) - : build_int_cstu (stept, lup_mul)); - } - else - { - if (SCALAR_FLOAT_TYPE_P (stept)) - { - tree tem = build_int_cst (integer_type_node, vf); - lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, - stept, tem); - } - else - lupdate_mul = build_int_cst (stept, vf); - lupdate_mul = gimple_build_vector_from_val (&init_stmts, - step_vectype, - lupdate_mul); - } + /* The number of iterations covered in one vector iteration. */ + unsigned lup_mul = (nvects * const_nunits) / group_size; + lupdate_mul + = build_vector_from_val (step_vectype, + SCALAR_FLOAT_TYPE_P (stept) + ? build_real_from_wide (stept, lup_mul, + UNSIGNED) + : build_int_cstu (stept, lup_mul)); } - tree peel_mul = NULL_TREE; - if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)) + else { if (SCALAR_FLOAT_TYPE_P (stept)) - peel_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, - LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)); - else - peel_mul = gimple_convert (&init_stmts, stept, - LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)); - peel_mul = gimple_build_vector_from_val (&init_stmts, - step_vectype, peel_mul); - - /* If early break then we have to create a new PHI which we can use as - an offset to adjust the induction reduction in early exits. - - This is because when peeling for alignment using masking, the first - few elements of the vector can be inactive. As such if we find the - entry in the first iteration we have adjust the starting point of - the scalar code. - - We do this by creating a new scalar PHI that keeps track of whether - we are the first iteration of the loop (with the additional masking) - or whether we have taken a loop iteration already. - - The generated sequence: - - pre-header: - bb1: - i_1 = <number of leading inactive elements> - - header: - bb2: - i_2 = PHI <i_1(bb1), 0(latch)> - … - - early-exit: - bb3: - i_3 = iv_step * i_2 + PHI<vector-iv> - - The first part of the adjustment to create i_1 and i_2 are done here - and the last part creating i_3 is done in - vectorizable_live_operations when the induction extraction is - materialized. */ - if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) - && !LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo)) { - auto skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); - tree ty_skip_niters = TREE_TYPE (skip_niters); - tree break_lhs_phi = vect_get_new_vect_var (ty_skip_niters, - vect_scalar_var, - "pfa_iv_offset"); - gphi *nphi = create_phi_node (break_lhs_phi, bb); - add_phi_arg (nphi, skip_niters, pe, UNKNOWN_LOCATION); - add_phi_arg (nphi, build_zero_cst (ty_skip_niters), - loop_latch_edge (iv_loop), UNKNOWN_LOCATION); - - LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo) - = PHI_RESULT (nphi); + tree tem = build_int_cst (integer_type_node, vf); + lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, tem); } + else + lupdate_mul = build_int_cst (stept, vf); + lupdate_mul = gimple_build_vector_from_val (&init_stmts, step_vectype, + lupdate_mul); } - tree step_mul = NULL_TREE; - unsigned ivn; - auto_vec<tree> vec_steps; - for (ivn = 0; ivn < nivs; ++ivn) + } + tree peel_mul = NULL_TREE; + if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)) + { + if (SCALAR_FLOAT_TYPE_P (stept)) + peel_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, + LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)); + else + peel_mul = gimple_convert (&init_stmts, stept, + LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)); + peel_mul = gimple_build_vector_from_val (&init_stmts, + step_vectype, peel_mul); + + /* If early break then we have to create a new PHI which we can use as + an offset to adjust the induction reduction in early exits. + + This is because when peeling for alignment using masking, the first + few elements of the vector can be inactive. As such if we find the + entry in the first iteration we have adjust the starting point of + the scalar code. + + We do this by creating a new scalar PHI that keeps track of whether + we are the first iteration of the loop (with the additional masking) + or whether we have taken a loop iteration already. + + The generated sequence: + + pre-header: + bb1: + i_1 = <number of leading inactive elements> + + header: + bb2: + i_2 = PHI <i_1(bb1), 0(latch)> + … + + early-exit: + bb3: + i_3 = iv_step * i_2 + PHI<vector-iv> + + The first part of the adjustment to create i_1 and i_2 are done here + and the last part creating i_3 is done in + vectorizable_live_operations when the induction extraction is + materialized. */ + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) + && !LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo)) + { + auto skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); + tree ty_skip_niters = TREE_TYPE (skip_niters); + tree break_lhs_phi = vect_get_new_vect_var (ty_skip_niters, + vect_scalar_var, + "pfa_iv_offset"); + gphi *nphi = create_phi_node (break_lhs_phi, bb); + add_phi_arg (nphi, skip_niters, pe, UNKNOWN_LOCATION); + add_phi_arg (nphi, build_zero_cst (ty_skip_niters), + loop_latch_edge (iv_loop), UNKNOWN_LOCATION); + + LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo) = PHI_RESULT (nphi); + } + } + tree step_mul = NULL_TREE; + unsigned ivn; + auto_vec<tree> vec_steps; + for (ivn = 0; ivn < nivs; ++ivn) + { + gimple_seq stmts = NULL; + bool invariant = true; + if (nunits.is_constant (&const_nunits)) { - gimple_seq stmts = NULL; - bool invariant = true; - if (nunits.is_constant (&const_nunits)) + tree_vector_builder step_elts (step_vectype, const_nunits, 1); + tree_vector_builder init_elts (vectype, const_nunits, 1); + tree_vector_builder mul_elts (step_vectype, const_nunits, 1); + for (unsigned eltn = 0; eltn < const_nunits; ++eltn) { - tree_vector_builder step_elts (step_vectype, const_nunits, 1); - tree_vector_builder init_elts (vectype, const_nunits, 1); - tree_vector_builder mul_elts (step_vectype, const_nunits, 1); - for (unsigned eltn = 0; eltn < const_nunits; ++eltn) - { - /* The scalar steps of the IVs. */ - tree elt = steps[(ivn*const_nunits + eltn) % group_size]; - elt = gimple_convert (&init_stmts, - TREE_TYPE (step_vectype), elt); - step_elts.quick_push (elt); - if (!init_node) - { - /* The scalar inits of the IVs if not vectorized. */ - elt = inits[(ivn*const_nunits + eltn) % group_size]; - if (!useless_type_conversion_p (TREE_TYPE (vectype), - TREE_TYPE (elt))) - elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, - TREE_TYPE (vectype), elt); - init_elts.quick_push (elt); - } - /* The number of steps to add to the initial values. */ - unsigned mul_elt = (ivn*const_nunits + eltn) / group_size; - mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept) - ? build_real_from_wide (stept, mul_elt, - UNSIGNED) - : build_int_cstu (stept, mul_elt)); - } - vec_step = gimple_build_vector (&init_stmts, &step_elts); - step_mul = gimple_build_vector (&init_stmts, &mul_elts); + /* The scalar steps of the IVs. */ + tree elt = steps[(ivn*const_nunits + eltn) % group_size]; + elt = gimple_convert (&init_stmts, TREE_TYPE (step_vectype), elt); + step_elts.quick_push (elt); if (!init_node) - vec_init = gimple_build_vector (&init_stmts, &init_elts); - } - else - { - if (init_node) - ; - else if (INTEGRAL_TYPE_P (TREE_TYPE (steps[0]))) { - new_name = gimple_convert (&init_stmts, stept, inits[0]); - /* Build the initial value directly as a VEC_SERIES_EXPR. */ - vec_init = gimple_build (&init_stmts, VEC_SERIES_EXPR, - step_vectype, new_name, steps[0]); - if (!useless_type_conversion_p (vectype, step_vectype)) - vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, - vectype, vec_init); + /* The scalar inits of the IVs if not vectorized. */ + elt = inits[(ivn*const_nunits + eltn) % group_size]; + if (!useless_type_conversion_p (TREE_TYPE (vectype), + TREE_TYPE (elt))) + elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, + TREE_TYPE (vectype), elt); + init_elts.quick_push (elt); } - else - { - /* Build: - [base, base, base, ...] - + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */ - gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (steps[0]))); - gcc_assert (flag_associative_math); - gcc_assert (index_vectype != NULL_TREE); - - tree index = build_index_vector (index_vectype, 0, 1); - new_name = gimple_convert (&init_stmts, TREE_TYPE (steps[0]), - inits[0]); - tree base_vec = gimple_build_vector_from_val (&init_stmts, - step_vectype, - new_name); - tree step_vec = gimple_build_vector_from_val (&init_stmts, - step_vectype, - steps[0]); - vec_init = gimple_build (&init_stmts, FLOAT_EXPR, - step_vectype, index); - vec_init = gimple_build (&init_stmts, MULT_EXPR, - step_vectype, vec_init, step_vec); - vec_init = gimple_build (&init_stmts, PLUS_EXPR, - step_vectype, vec_init, base_vec); - if (!useless_type_conversion_p (vectype, step_vectype)) - vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, - vectype, vec_init); - } - /* iv_loop is nested in the loop to be vectorized. Generate: - vec_step = [S, S, S, S] */ - t = unshare_expr (steps[0]); - gcc_assert (CONSTANT_CLASS_P (t) - || TREE_CODE (t) == SSA_NAME); - vec_step = gimple_build_vector_from_val (&init_stmts, - step_vectype, t); - } - vec_steps.safe_push (vec_step); - if (peel_mul) - { - if (!step_mul) - step_mul = peel_mul; - else - step_mul = gimple_build (&init_stmts, - MINUS_EXPR, step_vectype, - step_mul, peel_mul); - } - - /* Create the induction-phi that defines the induction-operand. */ - vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, - "vec_iv_"); - induction_phi = create_phi_node (vec_dest, iv_loop->header); - induc_def = PHI_RESULT (induction_phi); - - /* Create the iv update inside the loop */ - tree up = vec_step; - if (lupdate_mul) - { - if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)) - { - /* When we're using loop_len produced by SELEC_VL, the - non-final iterations are not always processing VF - elements. So vectorize induction variable instead of - - _21 = vect_vec_iv_.6_22 + { VF, ... }; - - We should generate: - - _35 = .SELECT_VL (ivtmp_33, VF); - vect_cst__22 = [vec_duplicate_expr] _35; - _21 = vect_vec_iv_.6_22 + vect_cst__22; */ - vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); - tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, - vectype, 0, 0); - if (SCALAR_FLOAT_TYPE_P (stept)) - expr = gimple_build (&stmts, FLOAT_EXPR, stept, len); - else - expr = gimple_convert (&stmts, stept, len); - lupdate_mul = gimple_build_vector_from_val (&stmts, - step_vectype, - expr); - up = gimple_build (&stmts, MULT_EXPR, - step_vectype, vec_step, lupdate_mul); - } - else - up = gimple_build (&init_stmts, - MULT_EXPR, step_vectype, - vec_step, lupdate_mul); - } - vec_def = gimple_convert (&stmts, step_vectype, induc_def); - vec_def = gimple_build (&stmts, - PLUS_EXPR, step_vectype, vec_def, up); - vec_def = gimple_convert (&stmts, vectype, vec_def); - insert_iv_increment (&incr_si, insert_after, stmts); - add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), - UNKNOWN_LOCATION); - - if (init_node) - vec_init = vect_get_slp_vect_def (init_node, ivn); - if (!nested_in_vect_loop - && step_mul - && !integer_zerop (step_mul)) - { - gcc_assert (invariant); - vec_def = gimple_convert (&init_stmts, step_vectype, vec_init); - up = gimple_build (&init_stmts, MULT_EXPR, step_vectype, - vec_step, step_mul); - vec_def = gimple_build (&init_stmts, PLUS_EXPR, step_vectype, - vec_def, up); - vec_init = gimple_convert (&init_stmts, vectype, vec_def); - } - - /* Set the arguments of the phi node: */ - add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION); - - slp_node->push_vec_def (induction_phi); - } - if (!nested_in_vect_loop) - { - /* Fill up to the number of vectors we need for the whole group. */ - if (nunits.is_constant (&const_nunits)) - nivs = least_common_multiple (group_size, - const_nunits) / const_nunits; - else - nivs = 1; - vec_steps.reserve (nivs-ivn); - for (; ivn < nivs; ++ivn) - { - slp_node->push_vec_def (SLP_TREE_VEC_DEFS (slp_node)[0]); - vec_steps.quick_push (vec_steps[0]); + /* The number of steps to add to the initial values. */ + unsigned mul_elt = (ivn*const_nunits + eltn) / group_size; + mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept) + ? build_real_from_wide (stept, mul_elt, + UNSIGNED) + : build_int_cstu (stept, mul_elt)); } + vec_step = gimple_build_vector (&init_stmts, &step_elts); + step_mul = gimple_build_vector (&init_stmts, &mul_elts); + if (!init_node) + vec_init = gimple_build_vector (&init_stmts, &init_elts); } - - /* Re-use IVs when we can. We are generating further vector - stmts by adding VF' * stride to the IVs generated above. */ - if (ivn < nvects) + else { - if (nunits.is_constant (&const_nunits)) + if (init_node) + ; + else if (INTEGRAL_TYPE_P (TREE_TYPE (steps[0]))) { - unsigned vfp = (least_common_multiple (group_size, const_nunits) - / group_size); - lupdate_mul - = build_vector_from_val (step_vectype, - SCALAR_FLOAT_TYPE_P (stept) - ? build_real_from_wide (stept, - vfp, UNSIGNED) - : build_int_cstu (stept, vfp)); + new_name = gimple_convert (&init_stmts, stept, inits[0]); + /* Build the initial value directly as a VEC_SERIES_EXPR. */ + vec_init = gimple_build (&init_stmts, VEC_SERIES_EXPR, + step_vectype, new_name, steps[0]); + if (!useless_type_conversion_p (vectype, step_vectype)) + vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, + vectype, vec_init); } else { - if (SCALAR_FLOAT_TYPE_P (stept)) - { - tree tem = build_int_cst (integer_type_node, nunits); - lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, - stept, tem); - } - else - lupdate_mul = build_int_cst (stept, nunits); - lupdate_mul = gimple_build_vector_from_val (&init_stmts, - step_vectype, - lupdate_mul); - } - for (; ivn < nvects; ++ivn) - { - gimple *iv - = SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (slp_node)[ivn - nivs]); - tree def = gimple_get_lhs (iv); - if (ivn < 2*nivs) - vec_steps[ivn - nivs] - = gimple_build (&init_stmts, MULT_EXPR, step_vectype, - vec_steps[ivn - nivs], lupdate_mul); - gimple_seq stmts = NULL; - def = gimple_convert (&stmts, step_vectype, def); - def = gimple_build (&stmts, PLUS_EXPR, step_vectype, - def, vec_steps[ivn % nivs]); - def = gimple_convert (&stmts, vectype, def); - if (gimple_code (iv) == GIMPLE_PHI) - gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); - else - { - gimple_stmt_iterator tgsi = gsi_for_stmt (iv); - gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING); - } - slp_node->push_vec_def (def); + /* Build: + [base, base, base, ...] + + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */ + gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (steps[0]))); + gcc_assert (flag_associative_math); + gcc_assert (index_vectype != NULL_TREE); + + tree index = build_index_vector (index_vectype, 0, 1); + new_name = gimple_convert (&init_stmts, TREE_TYPE (steps[0]), + inits[0]); + tree base_vec = gimple_build_vector_from_val (&init_stmts, + step_vectype, + new_name); + tree step_vec = gimple_build_vector_from_val (&init_stmts, + step_vectype, + steps[0]); + vec_init = gimple_build (&init_stmts, FLOAT_EXPR, + step_vectype, index); + vec_init = gimple_build (&init_stmts, MULT_EXPR, + step_vectype, vec_init, step_vec); + vec_init = gimple_build (&init_stmts, PLUS_EXPR, + step_vectype, vec_init, base_vec); + if (!useless_type_conversion_p (vectype, step_vectype)) + vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, + vectype, vec_init); } + /* iv_loop is nested in the loop to be vectorized. Generate: + vec_step = [S, S, S, S] */ + t = unshare_expr (steps[0]); + gcc_assert (CONSTANT_CLASS_P (t) + || TREE_CODE (t) == SSA_NAME); + vec_step = gimple_build_vector_from_val (&init_stmts, + step_vectype, t); + } + vec_steps.safe_push (vec_step); + if (peel_mul) + { + if (!step_mul) + step_mul = peel_mul; + else + step_mul = gimple_build (&init_stmts, + MINUS_EXPR, step_vectype, + step_mul, peel_mul); } - new_bb = gsi_insert_seq_on_edge_immediate (pe, init_stmts); - gcc_assert (!new_bb); - - return true; - } + /* Create the induction-phi that defines the induction-operand. */ + vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, + "vec_iv_"); + induction_phi = create_phi_node (vec_dest, iv_loop->header); + induc_def = PHI_RESULT (induction_phi); - tree init_expr = vect_phi_initial_value (phi); - - gimple_seq stmts = NULL; - if (!nested_in_vect_loop) - { - /* Convert the initial value to the IV update type. */ - tree new_type = TREE_TYPE (step_expr); - init_expr = gimple_convert (&stmts, new_type, init_expr); - - /* If we are using the loop mask to "peel" for alignment then we need - to adjust the start value here. */ - tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); - if (skip_niters != NULL_TREE) + /* Create the iv update inside the loop */ + tree up = vec_step; + if (lupdate_mul) { - if (FLOAT_TYPE_P (vectype)) - skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type, - skip_niters); - else - skip_niters = gimple_convert (&stmts, new_type, skip_niters); - tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type, - skip_niters, step_expr); - init_expr = gimple_build (&stmts, MINUS_EXPR, new_type, - init_expr, skip_step); - } - } + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)) + { + /* When we're using loop_len produced by SELEC_VL, the + non-final iterations are not always processing VF + elements. So vectorize induction variable instead of - if (stmts) - { - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } + _21 = vect_vec_iv_.6_22 + { VF, ... }; - /* Create the vector that holds the initial_value of the induction. */ - if (nested_in_vect_loop) - { - /* iv_loop is nested in the loop to be vectorized. init_expr had already - been created during vectorization of previous stmts. We obtain it - from the STMT_VINFO_VEC_STMT of the defining stmt. */ - auto_vec<tree> vec_inits; - vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1, - init_expr, &vec_inits); - vec_init = vec_inits[0]; - /* If the initial value is not of proper type, convert it. */ - if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init))) - { - new_stmt - = gimple_build_assign (vect_get_new_ssa_name (vectype, - vect_simple_var, - "vec_iv_"), - VIEW_CONVERT_EXPR, - build1 (VIEW_CONVERT_EXPR, vectype, - vec_init)); - vec_init = gimple_assign_lhs (new_stmt); - new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop), - new_stmt); - gcc_assert (!new_bb); - } - } - else - { - /* iv_loop is the loop to be vectorized. Create: - vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */ - stmts = NULL; - new_name = gimple_convert (&stmts, TREE_TYPE (step_expr), init_expr); + We should generate: - unsigned HOST_WIDE_INT const_nunits; - if (nunits.is_constant (&const_nunits)) - { - tree_vector_builder elts (step_vectype, const_nunits, 1); - elts.quick_push (new_name); - for (i = 1; i < const_nunits; i++) - { - /* Create: new_name_i = new_name + step_expr */ - new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name), - new_name, step_expr); - elts.quick_push (new_name); + _35 = .SELECT_VL (ivtmp_33, VF); + vect_cst__22 = [vec_duplicate_expr] _35; + _21 = vect_vec_iv_.6_22 + vect_cst__22; */ + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); + tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, + vectype, 0, 0); + if (SCALAR_FLOAT_TYPE_P (stept)) + expr = gimple_build (&stmts, FLOAT_EXPR, stept, len); + else + expr = gimple_convert (&stmts, stept, len); + lupdate_mul = gimple_build_vector_from_val (&stmts, step_vectype, + expr); + up = gimple_build (&stmts, MULT_EXPR, + step_vectype, vec_step, lupdate_mul); } - /* Create a vector from [new_name_0, new_name_1, ..., - new_name_nunits-1] */ - vec_init = gimple_build_vector (&stmts, &elts); - } - else if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr))) - /* Build the initial value directly from a VEC_SERIES_EXPR. */ - vec_init = gimple_build (&stmts, VEC_SERIES_EXPR, step_vectype, - new_name, step_expr); - else - { - /* Build: - [base, base, base, ...] - + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */ - gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))); - gcc_assert (flag_associative_math); - gcc_assert (index_vectype != NULL_TREE); - - tree index = build_index_vector (index_vectype, 0, 1); - tree base_vec = gimple_build_vector_from_val (&stmts, step_vectype, - new_name); - tree step_vec = gimple_build_vector_from_val (&stmts, step_vectype, - step_expr); - vec_init = gimple_build (&stmts, FLOAT_EXPR, step_vectype, index); - vec_init = gimple_build (&stmts, MULT_EXPR, step_vectype, - vec_init, step_vec); - vec_init = gimple_build (&stmts, PLUS_EXPR, step_vectype, - vec_init, base_vec); - } - vec_init = gimple_convert (&stmts, vectype, vec_init); + else + up = gimple_build (&init_stmts, MULT_EXPR, step_vectype, + vec_step, lupdate_mul); + } + vec_def = gimple_convert (&stmts, step_vectype, induc_def); + vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype, vec_def, up); + vec_def = gimple_convert (&stmts, vectype, vec_def); + insert_iv_increment (&incr_si, insert_after, stmts); + add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), + UNKNOWN_LOCATION); - if (stmts) + if (init_node) + vec_init = vect_get_slp_vect_def (init_node, ivn); + if (!nested_in_vect_loop + && step_mul + && !integer_zerop (step_mul)) { - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); + gcc_assert (invariant); + vec_def = gimple_convert (&init_stmts, step_vectype, vec_init); + up = gimple_build (&init_stmts, MULT_EXPR, step_vectype, + vec_step, step_mul); + vec_def = gimple_build (&init_stmts, PLUS_EXPR, step_vectype, + vec_def, up); + vec_init = gimple_convert (&init_stmts, vectype, vec_def); } - } - - - /* Create the vector that holds the step of the induction. */ - gimple_stmt_iterator *step_iv_si = NULL; - if (nested_in_vect_loop) - /* iv_loop is nested in the loop to be vectorized. Generate: - vec_step = [S, S, S, S] */ - new_name = step_expr; - else if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)) - { - /* When we're using loop_len produced by SELEC_VL, the non-final - iterations are not always processing VF elements. So vectorize - induction variable instead of - _21 = vect_vec_iv_.6_22 + { VF, ... }; + /* Set the arguments of the phi node: */ + add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION); - We should generate: - - _35 = .SELECT_VL (ivtmp_33, VF); - vect_cst__22 = [vec_duplicate_expr] _35; - _21 = vect_vec_iv_.6_22 + vect_cst__22; */ - gcc_assert (!slp_node); - gimple_seq seq = NULL; - vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); - tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0); - expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr), - unshare_expr (len)), - &seq, true, NULL_TREE); - new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr), expr, - step_expr); - gsi_insert_seq_before (&si, seq, GSI_SAME_STMT); - step_iv_si = &si; + slp_node->push_vec_def (induction_phi); } - else + if (!nested_in_vect_loop) { - /* iv_loop is the loop to be vectorized. Generate: - vec_step = [VF*S, VF*S, VF*S, VF*S] */ - gimple_seq seq = NULL; - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) - { - expr = build_int_cst (integer_type_node, vf); - expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr); - } + /* Fill up to the number of vectors we need for the whole group. */ + if (nunits.is_constant (&const_nunits)) + nivs = least_common_multiple (group_size, const_nunits) / const_nunits; else - expr = build_int_cst (TREE_TYPE (step_expr), vf); - new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr), - expr, step_expr); - if (seq) + nivs = 1; + vec_steps.reserve (nivs-ivn); + for (; ivn < nivs; ++ivn) { - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); - gcc_assert (!new_bb); + slp_node->push_vec_def (SLP_TREE_VEC_DEFS (slp_node)[0]); + vec_steps.quick_push (vec_steps[0]); } } - t = unshare_expr (new_name); - gcc_assert (CONSTANT_CLASS_P (new_name) - || TREE_CODE (new_name) == SSA_NAME); - new_vec = build_vector_from_val (step_vectype, t); - vec_step = vect_init_vector (loop_vinfo, stmt_info, - new_vec, step_vectype, step_iv_si); - - - /* Create the following def-use cycle: - loop prolog: - vec_init = ... - vec_step = ... - loop: - vec_iv = PHI <vec_init, vec_loop> - ... - STMT - ... - vec_loop = vec_iv + vec_step; */ - - /* Create the induction-phi that defines the induction-operand. */ - vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"); - induction_phi = create_phi_node (vec_dest, iv_loop->header); - induc_def = PHI_RESULT (induction_phi); - - /* Create the iv update inside the loop */ - stmts = NULL; - vec_def = gimple_convert (&stmts, step_vectype, induc_def); - vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype, vec_def, vec_step); - vec_def = gimple_convert (&stmts, vectype, vec_def); - gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); - new_stmt = SSA_NAME_DEF_STMT (vec_def); - - /* Set the arguments of the phi node: */ - add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION); - add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), - UNKNOWN_LOCATION); - - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi); - *vec_stmt = induction_phi; - - /* In case that vectorization factor (VF) is bigger than the number - of elements that we can fit in a vectype (nunits), we have to generate - more than one vector stmt - i.e - we need to "unroll" the - vector stmt by a factor VF/nunits. For more details see documentation - in vectorizable_operation. */ - - if (ncopies > 1) + /* Re-use IVs when we can. We are generating further vector + stmts by adding VF' * stride to the IVs generated above. */ + if (ivn < nvects) { - gimple_seq seq = NULL; - /* FORNOW. This restriction should be relaxed. */ - gcc_assert (!nested_in_vect_loop); - /* We expect LOOP_VINFO_USING_SELECT_VL_P to be false if ncopies > 1. */ - gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)); - - /* Create the vector that holds the step of the induction. */ - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) + if (nunits.is_constant (&const_nunits)) { - expr = build_int_cst (integer_type_node, nunits); - expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr); + unsigned vfp = (least_common_multiple (group_size, const_nunits) + / group_size); + lupdate_mul + = build_vector_from_val (step_vectype, + SCALAR_FLOAT_TYPE_P (stept) + ? build_real_from_wide (stept, + vfp, UNSIGNED) + : build_int_cstu (stept, vfp)); } else - expr = build_int_cst (TREE_TYPE (step_expr), nunits); - new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr), - expr, step_expr); - if (seq) { - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); - gcc_assert (!new_bb); - } - - t = unshare_expr (new_name); - gcc_assert (CONSTANT_CLASS_P (new_name) - || TREE_CODE (new_name) == SSA_NAME); - new_vec = build_vector_from_val (step_vectype, t); - vec_step = vect_init_vector (loop_vinfo, stmt_info, - new_vec, step_vectype, NULL); - - vec_def = induc_def; - for (i = 1; i < ncopies + 1; i++) - { - /* vec_i = vec_prev + vec_step */ - gimple_seq stmts = NULL; - vec_def = gimple_convert (&stmts, step_vectype, vec_def); - vec_def = gimple_build (&stmts, - PLUS_EXPR, step_vectype, vec_def, vec_step); - vec_def = gimple_convert (&stmts, vectype, vec_def); - - gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); - if (i < ncopies) + if (SCALAR_FLOAT_TYPE_P (stept)) { - new_stmt = SSA_NAME_DEF_STMT (vec_def); - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); + tree tem = build_int_cst (integer_type_node, nunits); + lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, tem); } else + lupdate_mul = build_int_cst (stept, nunits); + lupdate_mul = gimple_build_vector_from_val (&init_stmts, step_vectype, + lupdate_mul); + } + for (; ivn < nvects; ++ivn) + { + gimple *iv + = SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (slp_node)[ivn - nivs]); + tree def = gimple_get_lhs (iv); + if (ivn < 2*nivs) + vec_steps[ivn - nivs] + = gimple_build (&init_stmts, MULT_EXPR, step_vectype, + vec_steps[ivn - nivs], lupdate_mul); + gimple_seq stmts = NULL; + def = gimple_convert (&stmts, step_vectype, def); + def = gimple_build (&stmts, PLUS_EXPR, step_vectype, + def, vec_steps[ivn % nivs]); + def = gimple_convert (&stmts, vectype, def); + if (gimple_code (iv) == GIMPLE_PHI) + gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); + else { - /* vec_1 = vec_iv + (VF/n * S) - vec_2 = vec_1 + (VF/n * S) - ... - vec_n = vec_prev + (VF/n * S) = vec_iv + VF * S = vec_loop - - vec_n is used as vec_loop to save the large step register and - related operations. */ - add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), - UNKNOWN_LOCATION); + gimple_stmt_iterator tgsi = gsi_for_stmt (iv); + gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING); } + slp_node->push_vec_def (def); } } - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "transform induction: created def-use cycle: %G%G", - (gimple *) induction_phi, SSA_NAME_DEF_STMT (vec_def)); + new_bb = gsi_insert_seq_on_edge_immediate (pe, init_stmts); + gcc_assert (!new_bb); return true; } @@ -12203,7 +11603,7 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) gimple *stmt = STMT_VINFO_STMT (related_vinfo); stmt_worklist.safe_push (stmt); /* Set BB such that the assert in - 'get_initial_def_for_reduction' is able to determine that + 'get_initial_defs_for_reduction' is able to determine that the BB of the related stmt is inside this loop. */ gimple_set_bb (stmt, gimple_bb (new_stmt)); @@ -12701,6 +12101,13 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to" " variable-length vectorization factor\n"); } + + /* When we have unrolled the loop due to a user requested value we should + leave it up to the RTL unroll heuristics to determine if it's still worth + while to unroll more. */ + if (LOOP_VINFO_USER_UNROLL (loop_vinfo)) + loop->unroll = 0; + /* Free SLP instances here because otherwise stmt reference counting won't work. */ slp_instance instance; |