diff options
author | Ira Rosen <irar@il.ibm.com> | 2007-09-09 08:46:12 +0000 |
---|---|---|
committer | Ira Rosen <irar@gcc.gnu.org> | 2007-09-09 08:46:12 +0000 |
commit | 805e2059392217cfabc6b719df5a7ee7d0323c7b (patch) | |
tree | 614dcb43b7c25205723a4314d1363a71f9cf26cb /gcc/tree-vect-transform.c | |
parent | ae2bd7d2edf73ff8386e6185bfc033fa7e6e4633 (diff) | |
download | gcc-805e2059392217cfabc6b719df5a7ee7d0323c7b.zip gcc-805e2059392217cfabc6b719df5a7ee7d0323c7b.tar.gz gcc-805e2059392217cfabc6b719df5a7ee7d0323c7b.tar.bz2 |
tree-vectorizer.h (enum vect_def_type): Start enumeration from 1.
* tree-vectorizer.h (enum vect_def_type): Start enumeration from 1.
(struct _slp_tree, struct _slp_instance): Define new data structures
along macros for their access.
(struct _loop_vec_info): Define new fields: strided_stores,
slp_instances, and slp_unrolling_factor along macros for their access.
(enum slp_vect_type): New.
(struct _stmt_vec_info): Define new field, slp_type, and macros for its
access.
(STMT_VINFO_STRIDED_ACCESS): New macro.
(vect_free_slp_tree): Declare.
(vectorizable_load): Add an argument of type slp_tree.
(vectorizable_store, vectorizable_operation, vectorizable_conversion,
vectorizable_assignment): Likewise.
(vect_model_simple_cost, vect_model_store_cost, vect_model_load_cost):
Declare (make extern).
* tree-vectorizer.c (new_stmt_vec_info): Initiliaze the new field.
(new_loop_vec_info): Likewise.
(destroy_loop_vec_info): Free memory allocated for SLP structures.
* tree-vect-analyze.c: Include recog.h.
(vect_update_slp_costs_according_to_vf): New.
(vect_analyze_operations): Add argument for calls to vectorizable_ ()
functions. For not pure SLP stmts with strided access check that the
group size is power of 2. Update the vectorization factor according to
SLP. Call vect_update_slp_costs_according_to_vf.
(vect_analyze_group_access): New.
(vect_analyze_data_ref_access): Call vect_analyze_group_access.
(vect_free_slp_tree): New functions.
(vect_get_and_check_slp_defs, vect_build_slp_tree, vect_print_slp_tree,
vect_mark_slp_stmts, vect_analyze_slp_instance, vect_analyze_slp,
vect_make_slp_decision, vect_detect_hybrid_slp_stmts,
vect_detect_hybrid_slp): Likewise.
(vect_analyze_loop): Call vect_analyze_slp, vect_make_slp_decision
and vect_detect_hybrid_slp.
* tree-vect-transform.c (vect_estimate_min_profitable_iters): Take
SLP costs into account.
(vect_get_cost_fields): New function.
(vect_model_simple_cost): Make extern, add SLP parameter and handle
SLP.
(vect_model_store_cost, vect_model_load_cost): Likewise.
(vect_get_constant_vectors): New function.
(vect_get_slp_vect_defs, vect_get_slp_defs,
vect_get_vec_defs_for_stmt_copy, vect_get_vec_defs_for_stmt_copy,
vect_get_vec_defs): Likewise.
(vectorizable_reduction): Don't handle SLP for now.
(vectorizable_call): Don't handle SLP for now. Add argument to
vect_model_simple_cost.
(vectorizable_conversion): Handle SLP (call vect_get_vec_defs to
get SLPed and vectorized defs). Fix indentation and spacing.
(vectorizable_assignment): Handle SLP.
(vectorizable_induction): Don't handle SLP for now.
(vectorizable_operation): Likewise.
(vectorizable_type_demotion): Add argument to
vect_model_simple_cost.
(vectorizable_type_promotion): Likewise.
(vectorizable_store, vectorizable_load): Handle SLP.
(vectorizable_condition): Don't handle SLP for now.
(vect_transform_stmt): Add a new argument for SLP. Check that there is
no SLP transformation required for unsupported cases. Add SLP
argument for supported cases.
(vect_remove_stores): New function.
(vect_schedule_slp_instance, vect_schedule_slp): Likewise.
(vect_transform_loop): Schedule SLP instances.
* Makefile.in: (tree-vect-analyze.o): Depend on recog.h.
From-SVN: r128289
Diffstat (limited to 'gcc/tree-vect-transform.c')
-rw-r--r-- | gcc/tree-vect-transform.c | 900 |
1 files changed, 732 insertions, 168 deletions
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index cddebde..9698b41 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -46,7 +46,7 @@ along with GCC; see the file COPYING3. If not see #include "real.h" /* Utility functions for the code transformation. */ -static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *); +static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *, slp_tree); static tree vect_create_destination_var (tree, tree); static tree vect_create_data_ref_ptr (tree, struct loop*, tree, tree *, tree *, bool, tree, bool *); @@ -125,6 +125,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) int nbbs = loop->num_nodes; int byte_misalign; int innerloop_iters = 0, factor; + VEC (slp_instance, heap) *slp_instances; + slp_instance instance; /* Cost model disabled. */ if (!flag_vect_cost_model) @@ -287,6 +289,14 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) targetm.vectorize.builtin_vectorization_cost (runtime_test)); } + /* Add SLP costs. */ + slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); + for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++) + { + vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance); + vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance); + } + /* Calculate number of iterations required to make the vector version profitable, relative to the loop bodies only. The following condition must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where @@ -452,30 +462,55 @@ vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies) } +/* Return addresses of the cost fields of SLP_NODE if it's not NULL, and of + the stmt otherwise. */ + +static inline void +vect_get_cost_fields (stmt_vec_info stmt_info, slp_tree slp_node, + int **inside_cost_field, int **outside_cost_field) +{ + if (slp_node) + { + *inside_cost_field = &(SLP_TREE_INSIDE_OF_LOOP_COST (slp_node)); + *outside_cost_field = &(SLP_TREE_OUTSIDE_OF_LOOP_COST (slp_node)); + } + else + { + *inside_cost_field = &(STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)); + *outside_cost_field = &(STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info)); + } +} + + /* Function vect_model_simple_cost. Models cost for simple operations, i.e. those that only emit ncopies of a single op. Right now, this does not account for multiple insns that could be generated for the single vector op. We will handle that shortly. */ -static void -vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type *dt) +void +vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, + enum vect_def_type *dt, slp_tree slp_node) { int i; + int *inside_cost_field, *outside_cost_field; - STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST; + /* Take addresses of relevant fields to update in the function. */ + vect_get_cost_fields (stmt_info, slp_node, &inside_cost_field, + &outside_cost_field); + + *inside_cost_field = ncopies * TARG_VEC_STMT_COST; /* FORNOW: Assuming maximum 2 args per stmts. */ - for (i=0; i<2; i++) + for (i = 0; i < 2; i++) { if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def) - STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) += TARG_SCALAR_TO_VEC_COST; + *outside_cost_field += TARG_SCALAR_TO_VEC_COST; } if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, " - "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info), - STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info)); + "outside_cost = %d .", *inside_cost_field, *outside_cost_field); } @@ -502,14 +537,20 @@ vect_cost_strided_group_size (stmt_vec_info stmt_info) Models cost for stores. In the case of strided accesses, one access has the overhead of the strided access attributed to it. */ -static void -vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type dt) +void +vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, + enum vect_def_type dt, slp_tree slp_node) { int cost = 0; int group_size; + int *inside_cost_field, *outside_cost_field; + + /* Take addresses of relevant fields to update in the function. */ + vect_get_cost_fields (stmt_info, slp_node, &inside_cost_field, + &outside_cost_field); if (dt == vect_constant_def || dt == vect_invariant_def) - STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = TARG_SCALAR_TO_VEC_COST; + *outside_cost_field = TARG_SCALAR_TO_VEC_COST; /* Strided access? */ if (DR_GROUP_FIRST_DR (stmt_info)) @@ -535,12 +576,11 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type /* Costs of the stores. */ cost += ncopies * TARG_VEC_STORE_COST; - STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost; + *inside_cost_field = cost; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, " - "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info), - STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info)); + "outside_cost = %d .", *inside_cost_field, *outside_cost_field); } @@ -551,8 +591,8 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type accesses are supported for loads, we also account for the costs of the access scheme chosen. */ -static void -vect_model_load_cost (stmt_vec_info stmt_info, int ncopies) +void +vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node) { int inner_cost = 0; @@ -560,10 +600,15 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies) int alignment_support_cheme; tree first_stmt; struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr; + int *inside_cost_field, *outside_cost_field; + + /* Take addresses of relevant fields to update in the function. */ + vect_get_cost_fields (stmt_info, slp_node, &inside_cost_field, + &outside_cost_field); /* Strided accesses? */ first_stmt = DR_GROUP_FIRST_DR (stmt_info); - if (first_stmt) + if (first_stmt && !slp_node) { group_size = vect_cost_strided_group_size (stmt_info); first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); @@ -641,14 +686,14 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies) access in the group. Inside the loop, there is a load op and a realignment op. */ - if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1) + if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node) { outer_cost = 2*TARG_VEC_STMT_COST; if (targetm.vectorize.builtin_mask_for_load) outer_cost += TARG_VEC_STMT_COST; } - STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost; + *outside_cost_field = outer_cost; inner_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST); @@ -659,12 +704,11 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies) gcc_unreachable (); } - STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = inner_cost; + *inside_cost_field = inner_cost; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, " - "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info), - STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info)); + "outside_cost = %d .", *inside_cost_field, *outside_cost_field); } @@ -1256,6 +1300,177 @@ vect_init_vector (tree stmt, tree vector_var, tree vector_type, } +/* For constant and loop invariant defs of SLP_NODE this function returns + (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts. + OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar + stmts. */ + +static void +vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds, + unsigned int op_num) +{ + VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node); + tree stmt = VEC_index (tree, stmts, 0); + stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); + int nunits = TYPE_VECTOR_SUBPARTS (vectype); + tree vec_cst; + tree t = NULL_TREE; + int j, number_of_places_left_in_vector; + tree vector_type; + tree op, vop, operation; + int group_size = VEC_length (tree, stmts); + unsigned int vec_num, i; + int number_of_copies = 1; + bool is_store = false; + unsigned int number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors); + + if (STMT_VINFO_DATA_REF (stmt_vinfo)) + is_store = true; + + /* NUMBER_OF_COPIES is the number of times we need to use the same values in + created vectors. It is greater than 1 if unrolling is performed. + + For example, we have two scalar operands, s1 and s2 (e.g., group of + strided accesses of size two), while NUINTS is four (i.e., four scalars + of this type can be packed in a vector). The output vector will contain + two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES + will be 2). + + If GROUP_SIZE > NUNITS, the scalars will be split into several vectors + containing the operands. + + For example, NUINTS is four as before, and the group size is 8 + (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and + {s5, s6, s7, s8}. */ + + number_of_copies = least_common_multiple (nunits, group_size) / group_size; + + number_of_places_left_in_vector = nunits; + for (j = 0; j < number_of_copies; j++) + { + for (i = group_size - 1; VEC_iterate (tree, stmts, i, stmt); i--) + { + operation = GIMPLE_STMT_OPERAND (stmt, 1); + if (is_store) + op = operation; + else + op = TREE_OPERAND (operation, op_num); + + /* Create 'vect_ = {op0,op1,...,opn}'. */ + t = tree_cons (NULL_TREE, op, t); + + number_of_places_left_in_vector--; + + if (number_of_places_left_in_vector == 0) + { + number_of_places_left_in_vector = nunits; + + vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); + vec_cst = build_constructor_from_list (vector_type, t); + VEC_quick_push (tree, voprnds, + vect_init_vector (stmt, vec_cst, vector_type, + NULL)); + t = NULL_TREE; + } + } + } + + /* Since the vectors are created in the reverse order, we should invert + them. */ + vec_num = VEC_length (tree, voprnds); + for (j = vec_num - 1; j >= 0; j--) + { + vop = VEC_index (tree, voprnds, j); + VEC_quick_push (tree, *vec_oprnds, vop); + } + + VEC_free (tree, heap, voprnds); + + /* In case that VF is greater than the unrolling factor needed for the SLP + group of stmts, NUMBER_OF_VECTORS to be created is greater than + NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have + to replicate the vectors. */ + while (number_of_vectors > VEC_length (tree, *vec_oprnds)) + { + for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++) + VEC_quick_push (tree, *vec_oprnds, vop); + } +} + + +/* Get vectorized defintions from SLP_NODE that contains corresponding + vectorized def-stmts. */ + +static void +vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds) +{ + tree vec_oprnd; + tree vec_def_stmt; + unsigned int i; + + gcc_assert (SLP_TREE_VEC_STMTS (slp_node)); + + for (i = 0; + VEC_iterate (tree, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt); + i++) + { + gcc_assert (vec_def_stmt); + vec_oprnd = GIMPLE_STMT_OPERAND (vec_def_stmt, 0); + VEC_quick_push (tree, *vec_oprnds, vec_oprnd); + } +} + + +/* Get vectorized definitions for SLP_NODE. + If the scalar definitions are loop invariants or constants, collect them and + call vect_get_constant_vectors() to create vector stmts. + Otherwise, the def-stmts must be already vectorized and the vectorized stmts + must be stored in the LEFT/RIGHT node of SLP_NODE, and we call + vect_get_slp_vect_defs() to retrieve them. */ + +static void +vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0, + VEC (tree,heap) **vec_oprnds1) +{ + tree operation, first_stmt; + + /* Allocate memory for vectorized defs. */ + *vec_oprnds0 = VEC_alloc (tree, heap, + SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)); + + /* SLP_NODE corresponds either to a group of stores or to a group of + unary/binary operations. We don't call this function for loads. */ + if (SLP_TREE_LEFT (slp_node)) + /* The defs are already vectorized. */ + vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0); + else + /* Build vectors from scalar defs. */ + vect_get_constant_vectors (slp_node, vec_oprnds0, 0); + + first_stmt = VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0); + if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))) + /* Since we don't call this function with loads, this is a group of + stores. */ + return; + + operation = GIMPLE_STMT_OPERAND (first_stmt, 1); + if (TREE_OPERAND_LENGTH (operation) == unary_op) + return; + + *vec_oprnds1 = VEC_alloc (tree, heap, + SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)); + + if (SLP_TREE_RIGHT (slp_node)) + /* The defs are already vectorized. */ + vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1); + else + /* Build vectors from scalar defs. */ + vect_get_constant_vectors (slp_node, vec_oprnds1, 1); +} + + /* Function get_initial_def_for_induction Input: @@ -1744,6 +1959,54 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) } +/* Get vectorized definitions for the operands to create a copy of an original + stmt. See vect_get_vec_def_for_stmt_copy() for details. */ + +static void +vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt, + VEC(tree,heap) **vec_oprnds0, + VEC(tree,heap) **vec_oprnds1) +{ + tree vec_oprnd = VEC_pop (tree, *vec_oprnds0); + + vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd); + VEC_quick_push (tree, *vec_oprnds0, vec_oprnd); + + if (vec_oprnds1) + { + vec_oprnd = VEC_pop (tree, *vec_oprnds1); + vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd); + VEC_quick_push (tree, *vec_oprnds1, vec_oprnd); + } +} + + +/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */ + +static void +vect_get_vec_defs (tree op0, tree op1, tree stmt, VEC(tree,heap) **vec_oprnds0, + VEC(tree,heap) **vec_oprnds1, slp_tree slp_node) +{ + if (slp_node) + vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1); + else + { + tree vec_oprnd; + + *vec_oprnds0 = VEC_alloc (tree, heap, 1); + vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL); + VEC_quick_push (tree, *vec_oprnds0, vec_oprnd); + + if (op1) + { + *vec_oprnds1 = VEC_alloc (tree, heap, 1); + vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL); + VEC_quick_push (tree, *vec_oprnds1, vec_oprnd); + } + } +} + + /* Function vect_finish_stmt_generation. Insert a new stmt. */ @@ -2399,6 +2662,10 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) gcc_assert (ncopies >= 1); + /* FORNOW: SLP not supported. */ + if (STMT_SLP_TYPE (stmt_info)) + return false; + /* 1. Is vectorizable reduction? */ /* Not supportable if the reduction variable is used in the loop. */ @@ -2707,6 +2974,10 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def) return false; + /* FORNOW: SLP not supported. */ + if (STMT_SLP_TYPE (stmt_info)) + return false; + /* FORNOW: not yet supported. */ if (STMT_VINFO_LIVE_P (stmt_info)) { @@ -2815,7 +3086,7 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_call ==="); - vect_model_simple_cost (stmt_info, ncopies, dt); + vect_model_simple_cost (stmt_info, ncopies, dt, NULL); return true; } @@ -3005,16 +3276,14 @@ vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl, } -/* Function vectorizable_conversion. - -Check if STMT performs a conversion operation, that can be vectorized. -If VEC_STMT is also passed, vectorize the STMT: create a vectorized -stmt to replace it, put it in VEC_STMT, and insert it at BSI. -Return FALSE if not a vectorizable STMT, TRUE otherwise. */ +/* Check if STMT performs a conversion operation, that can be vectorized. + If VEC_STMT is also passed, vectorize the STMT: create a vectorized + stmt to replace it, put it in VEC_STMT, and insert it at BSI. + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ bool -vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, - tree * vec_stmt) +vectorizable_conversion (tree stmt, block_stmt_iterator *bsi, + tree *vec_stmt, slp_tree slp_node) { tree vec_dest; tree scalar_dest; @@ -3028,8 +3297,8 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, tree decl1 = NULL_TREE, decl2 = NULL_TREE; tree new_temp; tree def, def_stmt; - enum vect_def_type dt0; - tree new_stmt; + enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; + tree new_stmt = NULL_TREE; stmt_vec_info prev_stmt_info; int nunits_in; int nunits_out; @@ -3039,6 +3308,9 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, tree rhs_type, lhs_type; tree builtin_decl; enum { NARROW, NONE, WIDEN } modifier; + int i; + VEC(tree,heap) *vec_oprnds0 = NULL; + tree vop0; /* Is STMT a vectorizable conversion? */ @@ -3067,7 +3339,7 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) return false; - /* Check types of lhs and rhs */ + /* Check types of lhs and rhs. */ op0 = TREE_OPERAND (operation, 0); rhs_type = TREE_TYPE (op0); vectype_in = get_vectype_for_scalar_type (rhs_type); @@ -3091,7 +3363,7 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, if (modifier == NONE) gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out); - /* Bail out if the types are both integral or non-integral */ + /* Bail out if the types are both integral or non-integral. */ if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type)) || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type))) return false; @@ -3101,6 +3373,11 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, else ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; + /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies + this, so we can safely override NCOPIES with 1 here. */ + if (slp_node) + ncopies = 1; + /* Sanity check: make sure that at least one copy of the vectorized stmt needs to be generated. */ gcc_assert (ncopies >= 1); @@ -3114,7 +3391,7 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, } /* Check the operands of the operation. */ - if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0)) + if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0])) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); @@ -3138,7 +3415,12 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, } if (modifier != NONE) - STMT_VINFO_VECTYPE (stmt_info) = vectype_in; + { + STMT_VINFO_VECTYPE (stmt_info) = vectype_in; + /* FORNOW: SLP not supported. */ + if (STMT_SLP_TYPE (stmt_info)) + return false; + } if (!vec_stmt) /* transformation not required. */ { @@ -3153,6 +3435,9 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, /* Handle def. */ vec_dest = vect_create_destination_var (scalar_dest, vectype_out); + if (modifier == NONE && !slp_node) + vec_oprnds0 = VEC_alloc (tree, heap, 1); + prev_stmt_info = NULL; switch (modifier) { @@ -3163,24 +3448,30 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, ssa_op_iter iter; if (j == 0) - vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); + vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node); else - vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); + vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL); builtin_decl = targetm.vectorize.builtin_conversion (code, vectype_in); - new_stmt = build_call_expr (builtin_decl, 1, vec_oprnd0); + for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++) + { + new_stmt = build_call_expr (builtin_decl, 1, vop0); - /* Arguments are ready. create the new vector stmt. */ - new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt); - new_temp = make_ssa_name (vec_dest, new_stmt); - GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; - vect_finish_stmt_generation (stmt, new_stmt, bsi); - FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS) - { - if (TREE_CODE (sym) == SSA_NAME) - sym = SSA_NAME_VAR (sym); - mark_sym_for_renaming (sym); + /* Arguments are ready. create the new vector stmt. */ + new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt); + new_temp = make_ssa_name (vec_dest, new_stmt); + GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, new_stmt, bsi); + FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, + SSA_OP_ALL_VIRTUALS) + { + if (TREE_CODE (sym) == SSA_NAME) + sym = SSA_NAME_VAR (sym); + mark_sym_for_renaming (sym); + } + if (slp_node) + VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt); } if (j == 0) @@ -3201,7 +3492,7 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, if (j == 0) vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); else - vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); + vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); STMT_VINFO_VECTYPE (stmt_info) = vectype_in; @@ -3237,12 +3528,12 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, if (j == 0) { vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); - vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); + vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); } else { - vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1); - vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); + vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1); + vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); } /* Arguments are ready. Create the new vector stmt. */ @@ -3262,6 +3553,7 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); } + return true; } @@ -3274,12 +3566,12 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, Return FALSE if not a vectorizable STMT, TRUE otherwise. */ bool -vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) +vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt, + slp_tree slp_node) { tree vec_dest; tree scalar_dest; tree op; - tree vec_oprnd; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree vectype = STMT_VINFO_VECTYPE (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); @@ -3288,6 +3580,9 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; + int i; + VEC(tree,heap) *vec_oprnds = NULL; + tree vop; gcc_assert (ncopies >= 1); if (ncopies > 1) @@ -3328,7 +3623,7 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_assignment ==="); - vect_model_simple_cost (stmt_info, ncopies, dt); + vect_model_simple_cost (stmt_info, ncopies, dt, NULL); return true; } @@ -3340,15 +3635,22 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) vec_dest = vect_create_destination_var (scalar_dest, vectype); /* Handle use. */ - op = GIMPLE_STMT_OPERAND (stmt, 1); - vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL); + vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node); /* Arguments are ready. create the new vector stmt. */ - *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_oprnd); - new_temp = make_ssa_name (vec_dest, *vec_stmt); - GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp; - vect_finish_stmt_generation (stmt, *vec_stmt, bsi); + for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++) + { + *vec_stmt = build_gimple_modify_stmt (vec_dest, vop); + new_temp = make_ssa_name (vec_dest, *vec_stmt); + GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, *vec_stmt, bsi); + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt; + + if (slp_node) + VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt); + } + VEC_free (tree, heap, vec_oprnds); return true; } @@ -3403,6 +3705,10 @@ vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED, if (!STMT_VINFO_RELEVANT_P (stmt_info)) return false; + /* FORNOW: SLP not supported. */ + if (STMT_SLP_TYPE (stmt_info)) + return false; + gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def); if (STMT_VINFO_LIVE_P (stmt_info)) @@ -3444,13 +3750,14 @@ vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED, Return FALSE if not a vectorizable STMT, TRUE otherwise. */ bool -vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) +vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt, + slp_tree slp_node) { tree vec_dest; tree scalar_dest; tree operation; tree op0, op1 = NULL; - tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; + tree vec_oprnd1 = NULL_TREE; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree vectype = STMT_VINFO_VECTYPE (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); @@ -3464,14 +3771,20 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) enum machine_mode optab_op2_mode; tree def, def_stmt; enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; - tree new_stmt; + tree new_stmt = NULL_TREE; stmt_vec_info prev_stmt_info; int nunits_in = TYPE_VECTOR_SUBPARTS (vectype); int nunits_out; tree vectype_out; int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; - int j; - + int j, i; + VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; + tree vop0, vop1; + + /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies + this, so we can safely override NCOPIES with 1 here. */ + if (slp_node) + ncopies = 1; gcc_assert (ncopies >= 1); /* FORNOW. This restriction should be relaxed. */ if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) @@ -3601,7 +3914,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_operation ==="); - vect_model_simple_cost (stmt_info, ncopies, dt); + vect_model_simple_cost (stmt_info, ncopies, dt, NULL); return true; } @@ -3613,6 +3926,13 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) /* Handle def. */ vec_dest = vect_create_destination_var (scalar_dest, vectype); + if (!slp_node) + { + vec_oprnds0 = VEC_alloc (tree, heap, 1); + if (op_type == binary_op) + vec_oprnds1 = VEC_alloc (tree, heap, 1); + } + /* In case the vectorization factor (VF) is bigger than the number of elements that we can fit in a vectype (nunits), we have to generate more than one vector stmt - i.e - we need to "unroll" the @@ -3672,45 +3992,53 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) /* Handle uses. */ if (j == 0) { - vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); - if (op_type == binary_op) + if (op_type == binary_op + && (code == LSHIFT_EXPR || code == RSHIFT_EXPR) + && !slp_node) { - if (code == LSHIFT_EXPR || code == RSHIFT_EXPR) - { - /* Vector shl and shr insn patterns can be defined with - scalar operand 2 (shift operand). In this case, use - constant or loop invariant op1 directly, without - extending it to vector mode first. */ - optab_op2_mode = insn_data[icode].operand[2].mode; - if (!VECTOR_MODE_P (optab_op2_mode)) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "operand 1 using scalar mode."); - vec_oprnd1 = op1; - } - } - if (!vec_oprnd1) - vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL); + /* Vector shl and shr insn patterns can be defined with scalar + operand 2 (shift operand). In this case, use constant or loop + invariant op1 directly, without extending it to vector mode + first. */ + optab_op2_mode = insn_data[icode].operand[2].mode; + if (!VECTOR_MODE_P (optab_op2_mode)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "operand 1 using scalar mode."); + vec_oprnd1 = op1; + VEC_quick_push (tree, vec_oprnds1, vec_oprnd1); + } } + + if (op_type == binary_op && !vec_oprnd1) + vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, + slp_node); + else + vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, &vec_oprnds1, + slp_node); } else - { - vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); - if (op_type == binary_op) - vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); - } + vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); - /* Arguments are ready. create the new vector stmt. */ + /* Arguments are ready. Create the new vector stmt. */ + for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++) + { + if (op_type == binary_op) + { + vop1 = VEC_index (tree, vec_oprnds1, i); + new_stmt = build_gimple_modify_stmt (vec_dest, + build2 (code, vectype, vop0, vop1)); + } + else + new_stmt = build_gimple_modify_stmt (vec_dest, + build1 (code, vectype, vop0)); - if (op_type == binary_op) - new_stmt = build_gimple_modify_stmt (vec_dest, - build2 (code, vectype, vec_oprnd0, vec_oprnd1)); - else - new_stmt = build_gimple_modify_stmt (vec_dest, - build1 (code, vectype, vec_oprnd0)); - new_temp = make_ssa_name (vec_dest, new_stmt); - GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; - vect_finish_stmt_generation (stmt, new_stmt, bsi); + new_temp = make_ssa_name (vec_dest, new_stmt); + GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, new_stmt, bsi); + if (slp_node) + VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt); + } if (j == 0) STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; @@ -3719,6 +4047,10 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) prev_stmt_info = vinfo_for_stmt (new_stmt); } + VEC_free (tree, heap, vec_oprnds0); + if (vec_oprnds1) + VEC_free (tree, heap, vec_oprnds1); + return true; } @@ -3829,7 +4161,7 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_demotion ==="); - vect_model_simple_cost (stmt_info, ncopies, dt); + vect_model_simple_cost (stmt_info, ncopies, dt, NULL); return true; } @@ -4000,7 +4332,7 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi, STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_promotion ==="); - vect_model_simple_cost (stmt_info, 2*ncopies, dt); + vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL); return true; } @@ -4095,6 +4427,7 @@ vect_strided_store_supported (tree vectype) fprintf (vect_dump, "interleave op not supported by target."); return false; } + return true; } @@ -4242,7 +4575,8 @@ vect_permute_store_chain (VEC(tree,heap) *dr_chain, Return FALSE if not a vectorizable STMT, TRUE otherwise. */ bool -vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) +vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt, + slp_tree slp_node) { tree scalar_dest; tree data_ref; @@ -4268,6 +4602,15 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) unsigned int group_size, i; VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL; bool inv_p; + VEC(tree,heap) *vec_oprnds = NULL; + bool slp = (slp_node != NULL); + stmt_vec_info first_stmt_vinfo; + unsigned int vec_num; + + /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies + this, so we can safely override NCOPIES with 1 here. */ + if (slp) + ncopies = 1; gcc_assert (ncopies >= 1); @@ -4300,7 +4643,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0); if (TREE_CODE (scalar_dest) != ARRAY_REF && TREE_CODE (scalar_dest) != INDIRECT_REF - && !DR_GROUP_FIRST_DR (stmt_info)) + && !STMT_VINFO_STRIDED_ACCESS (stmt_info)) return false; op = GIMPLE_STMT_OPERAND (stmt, 1); @@ -4320,17 +4663,19 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { strided_store = true; - if (!vect_strided_store_supported (vectype)) + if (!vect_strided_store_supported (vectype) + && !PURE_SLP_STMT (stmt_info) && !slp) return false; } if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; - vect_model_store_cost (stmt_info, ncopies, dt); + if (!PURE_SLP_STMT (stmt_info)) + vect_model_store_cost (stmt_info, ncopies, dt, NULL); return true; } @@ -4350,17 +4695,28 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) /* We vectorize all the stmts of the interleaving group when we reach the last stmt in the group. */ if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt)) - < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))) + < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)) + && !slp) { *vec_stmt = NULL_TREE; return true; } + + if (slp) + strided_store = false; + + /* VEC_NUM is the number of vect stmts to be created for this group. */ + if (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) < group_size) + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + else + vec_num = group_size; } else { first_stmt = stmt; first_dr = dr; - group_size = 1; + group_size = vec_num = 1; + first_stmt_vinfo = stmt_info; } if (vect_print_dump_info (REPORT_DETAILS)) @@ -4420,26 +4776,39 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (j == 0) { - /* For interleaved stores we collect vectorized defs for all the - stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used - as an input to vect_permute_store_chain(), and OPRNDS as an input - to vect_get_vec_def_for_stmt_copy() for the next copy. - If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and - OPRNDS are of size 1. */ - next_stmt = first_stmt; - for (i = 0; i < group_size; i++) - { - /* Since gaps are not supported for interleaved stores, GROUP_SIZE - is the exact number of stmts in the chain. Therefore, NEXT_STMT - can't be NULL_TREE. In case that there is no interleaving, - GROUP_SIZE is 1, and only one iteration of the loop will be - executed. */ - gcc_assert (next_stmt); - op = GIMPLE_STMT_OPERAND (next_stmt, 1); - vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, NULL); - VEC_quick_push(tree, dr_chain, vec_oprnd); - VEC_quick_push(tree, oprnds, vec_oprnd); - next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); + if (slp) + { + /* Get vectorized arguments for SLP_NODE. */ + vect_get_slp_defs (slp_node, &vec_oprnds, NULL); + + vec_oprnd = VEC_index (tree, vec_oprnds, 0); + } + else + { + /* For interleaved stores we collect vectorized defs for all the + stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then + used as an input to vect_permute_store_chain(), and OPRNDS as + an input to vect_get_vec_def_for_stmt_copy() for the next copy. + + If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and + OPRNDS are of size 1. */ + next_stmt = first_stmt; + for (i = 0; i < group_size; i++) + { + /* Since gaps are not supported for interleaved stores, + GROUP_SIZE is the exact number of stmts in the chain. + Therefore, NEXT_STMT can't be NULL_TREE. In case that + there is no interleaving, GROUP_SIZE is 1, and only one + iteration of the loop will be executed. */ + gcc_assert (next_stmt); + op = GIMPLE_STMT_OPERAND (next_stmt, 1); + + vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, + NULL); + VEC_quick_push(tree, dr_chain, vec_oprnd); + VEC_quick_push(tree, oprnds, vec_oprnd); + next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); + } } dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE, &dummy, &ptr_incr, false, @@ -4448,6 +4817,9 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) } else { + /* FORNOW SLP doesn't work for multiple types. */ + gcc_assert (!slp); + /* For interleaved stores we created vectorized defs for all the defs stored in OPRNDS in the previous iteration (previous copy). DR_CHAIN is then used as an input to vect_permute_store_chain(), @@ -4476,12 +4848,19 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) } next_stmt = first_stmt; - for (i = 0; i < group_size; i++) + for (i = 0; i < vec_num; i++) { - /* For strided stores vectorized defs are interleaved in - vect_permute_store_chain(). */ - if (strided_store) - vec_oprnd = VEC_index(tree, result_chain, i); + if (i > 0) + /* Bump the vector pointer. */ + dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, + NULL_TREE); + + if (slp) + vec_oprnd = VEC_index (tree, vec_oprnds, i); + else if (strided_store) + /* For strided stores vectorized defs are interleaved in + vect_permute_store_chain(). */ + vec_oprnd = VEC_index (tree, result_chain, i); data_ref = build_fold_indirect_ref (dataref_ptr); /* Arguments are ready. Create the new vector stmt. */ @@ -4498,9 +4877,6 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); if (!next_stmt) break; - /* Bump the vector pointer. */ - dataref_ptr = - bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE); } } @@ -5021,7 +5397,8 @@ vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size, Return FALSE if not a vectorizable STMT, TRUE otherwise. */ bool -vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) +vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt, + slp_tree slp_node) { tree scalar_dest; tree vec_dest = NULL; @@ -5056,6 +5433,13 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) bool inv_p; bool compute_in_loop = false; struct loop *at_loop; + int vec_num; + bool slp = (slp_node != NULL); + + /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies + this, so we can safely override NCOPIES with 1 here. */ + if (slp) + ncopies = 1; gcc_assert (ncopies >= 1); @@ -5092,7 +5476,7 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) op = GIMPLE_STMT_OPERAND (stmt, 1); if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF - && !DR_GROUP_FIRST_DR (stmt_info)) + && !STMT_VINFO_STRIDED_ACCESS (stmt_info)) return false; if (!STMT_VINFO_DATA_REF (stmt_info)) @@ -5111,21 +5495,22 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) } /* Check if the load is a part of an interleaving chain. */ - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { strided_load = true; /* FORNOW */ gcc_assert (! nested_in_vect_loop); /* Check if interleaving is supported. */ - if (!vect_strided_load_supported (vectype)) + if (!vect_strided_load_supported (vectype) + && !PURE_SLP_STMT (stmt_info) && !slp) return false; } if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; - vect_model_load_cost (stmt_info, ncopies); + vect_model_load_cost (stmt_info, ncopies, NULL); return true; } @@ -5146,12 +5531,21 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)); dr_chain = VEC_alloc (tree, heap, group_size); + + /* VEC_NUM is the number of vect stmts to be created for this group. */ + if (slp) + { + strided_load = false; + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + } + else + vec_num = group_size; } else { first_stmt = stmt; first_dr = dr; - group_size = 1; + group_size = vec_num = 1; } alignment_support_scheme = vect_supportable_dr_alignment (first_dr); @@ -5296,8 +5690,12 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE); - for (i = 0; i < group_size; i++) + for (i = 0; i < vec_num; i++) { + if (i > 0) + dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, + NULL_TREE); + /* 2. Create the vector-load in the loop. */ switch (alignment_support_scheme) { @@ -5373,7 +5771,7 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (alignment_support_scheme == dr_explicit_realign_optimized) { - if (i == group_size - 1 && j == ncopies - 1) + if (i == vec_num - 1 && j == ncopies - 1) add_phi_arg (phi, lsq, loop_latch_edge (containing_loop)); msq = lsq; } @@ -5414,13 +5812,20 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) gcc_unreachable (); /* FORNOW. */ } - if (strided_load) - VEC_quick_push (tree, dr_chain, new_temp); - if (i < group_size - 1) - dataref_ptr = - bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE); + /* Collect vector loads and later create their permutation in + vect_transform_strided_load (). */ + if (strided_load) + VEC_quick_push (tree, dr_chain, new_temp); + + /* Store vector loads in the corresponding SLP_NODE. */ + if (slp) + VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt); } + /* FORNOW: SLP with multiple types is unsupported. */ + if (slp) + return true; + if (strided_load) { if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi)) @@ -5586,6 +5991,10 @@ vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def) return false; + /* FORNOW: SLP not supported. */ + if (STMT_SLP_TYPE (stmt_info)) + return false; + /* FORNOW: not yet supported. */ if (STMT_VINFO_LIVE_P (stmt_info)) { @@ -5676,12 +6085,14 @@ vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) return true; } + /* Function vect_transform_stmt. Create a vectorized stmt to replace STMT, and insert it at BSI. */ -bool -vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store) +static bool +vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store, + slp_tree slp_node) { bool is_store = false; tree vec_stmt = NULL_TREE; @@ -5692,44 +6103,47 @@ vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store) switch (STMT_VINFO_TYPE (stmt_info)) { case type_demotion_vec_info_type: + gcc_assert (!slp_node); done = vectorizable_type_demotion (stmt, bsi, &vec_stmt); gcc_assert (done); break; case type_promotion_vec_info_type: + gcc_assert (!slp_node); done = vectorizable_type_promotion (stmt, bsi, &vec_stmt); gcc_assert (done); break; case type_conversion_vec_info_type: - done = vectorizable_conversion (stmt, bsi, &vec_stmt); + done = vectorizable_conversion (stmt, bsi, &vec_stmt, slp_node); gcc_assert (done); break; case induc_vec_info_type: + gcc_assert (!slp_node); done = vectorizable_induction (stmt, bsi, &vec_stmt); gcc_assert (done); break; case op_vec_info_type: - done = vectorizable_operation (stmt, bsi, &vec_stmt); + done = vectorizable_operation (stmt, bsi, &vec_stmt, slp_node); gcc_assert (done); break; case assignment_vec_info_type: - done = vectorizable_assignment (stmt, bsi, &vec_stmt); + done = vectorizable_assignment (stmt, bsi, &vec_stmt, slp_node); gcc_assert (done); break; case load_vec_info_type: - done = vectorizable_load (stmt, bsi, &vec_stmt); + done = vectorizable_load (stmt, bsi, &vec_stmt, slp_node); gcc_assert (done); break; case store_vec_info_type: - done = vectorizable_store (stmt, bsi, &vec_stmt); + done = vectorizable_store (stmt, bsi, &vec_stmt, slp_node); gcc_assert (done); - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { /* In case of interleaving, the whole chain is vectorized when the last store in the chain is reached. Store stmts before the last @@ -5744,15 +6158,18 @@ vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store) break; case condition_vec_info_type: + gcc_assert (!slp_node); done = vectorizable_condition (stmt, bsi, &vec_stmt); gcc_assert (done); break; case call_vec_info_type: + gcc_assert (!slp_node); done = vectorizable_call (stmt, bsi, &vec_stmt); break; case reduc_vec_info_type: + gcc_assert (!slp_node); done = vectorizable_reduction (stmt, bsi, &vec_stmt); gcc_assert (done); break; @@ -6165,7 +6582,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); int nelements = TYPE_VECTOR_SUBPARTS (vectype); - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { /* For interleaved access element size must be multiplied by the size of the interleaved group. */ @@ -6593,6 +7010,115 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, } +/* Remove a group of stores (for SLP or interleaving), free their + stmt_vec_info. */ + +static void +vect_remove_stores (tree first_stmt) +{ + stmt_ann_t ann; + tree next = first_stmt; + tree tmp; + stmt_vec_info next_stmt_info; + block_stmt_iterator next_si; + + while (next) + { + /* Free the attached stmt_vec_info and remove the stmt. */ + next_si = bsi_for_stmt (next); + bsi_remove (&next_si, true); + next_stmt_info = vinfo_for_stmt (next); + ann = stmt_ann (next); + tmp = DR_GROUP_NEXT_DR (next_stmt_info); + free (next_stmt_info); + set_stmt_info (ann, NULL); + next = tmp; + } +} + + +/* Vectorize SLP instance tree in postorder. */ + +static bool +vect_schedule_slp_instance (slp_tree node, unsigned int vec_stmts_size) +{ + tree stmt; + bool strided_store, is_store; + block_stmt_iterator si; + stmt_vec_info stmt_info; + + if (!node) + return false; + + vect_schedule_slp_instance (SLP_TREE_LEFT (node), vec_stmts_size); + vect_schedule_slp_instance (SLP_TREE_RIGHT (node), vec_stmts_size); + + stmt = VEC_index(tree, SLP_TREE_SCALAR_STMTS (node), 0); + stmt_info = vinfo_for_stmt (stmt); + SLP_TREE_VEC_STMTS (node) = VEC_alloc (tree, heap, vec_stmts_size); + SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size; + + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "------>vectorizing SLP node starting from: "); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + + si = bsi_for_stmt (stmt); + is_store = vect_transform_stmt (stmt, &si, &strided_store, node); + if (is_store) + { + if (DR_GROUP_FIRST_DR (stmt_info)) + /* If IS_STORE is TRUE, the vectorization of the + interleaving chain was completed - free all the stores in + the chain. */ + vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info)); + else + /* FORNOW: SLP originates only from strided stores. */ + gcc_unreachable (); + + return true; + } + + /* FORNOW: SLP originates only from strided stores. */ + return false; +} + + +static bool +vect_schedule_slp (loop_vec_info loop_vinfo, unsigned int nunits) +{ + VEC (slp_instance, heap) *slp_instances = + LOOP_VINFO_SLP_INSTANCES (loop_vinfo); + slp_instance instance; + unsigned int vec_stmts_size; + unsigned int group_size, i; + unsigned int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + bool is_store = false; + + for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++) + { + group_size = SLP_INSTANCE_GROUP_SIZE (instance); + /* For each SLP instance calculate number of vector stmts to be created + for the scalar stmts in each node of the SLP tree. Number of vector + elements in one vector iteration is the number of scalar elements in + one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector + size. */ + vec_stmts_size = vectorization_factor * group_size / nunits; + + /* Schedule the tree of INSTANCE. */ + is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance), + vec_stmts_size); + + if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS) + || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) + fprintf (vect_dump, "vectorizing stmts using SLP."); + } + + return is_store; +} + + /* Function vect_transform_loop. The analysis phase has determined that the loop is vectorizable. @@ -6610,6 +7136,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) tree ratio = NULL; int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); bool strided_store; + bool slp_scheduled = false; + unsigned int nunits; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vec_transform_loop ==="); @@ -6744,6 +7272,7 @@ vect_transform_loop (loop_vec_info loop_vinfo) stmt_info = vinfo_for_stmt (phi); if (!stmt_info) continue; + if (!STMT_VINFO_RELEVANT_P (stmt_info) && !STMT_VINFO_LIVE_P (stmt_info)) continue; @@ -6757,7 +7286,7 @@ vect_transform_loop (loop_vec_info loop_vinfo) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "transform phi."); - vect_transform_stmt (phi, NULL, NULL); + vect_transform_stmt (phi, NULL, NULL, NULL); } } @@ -6791,21 +7320,56 @@ vect_transform_loop (loop_vec_info loop_vinfo) } gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); - if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)) - != (unsigned HOST_WIDE_INT) vectorization_factor) - && vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple-types."); + nunits = + (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); + if (!STMT_SLP_TYPE (stmt_info) + && nunits != (unsigned int) vectorization_factor + && vect_print_dump_info (REPORT_DETAILS)) + /* For SLP VF is set according to unrolling factor, and not to + vector size, hence for SLP this print is not valid. */ + fprintf (vect_dump, "multiple-types."); + + /* SLP. Schedule all the SLP instances when the first SLP stmt is + reached. */ + if (STMT_SLP_TYPE (stmt_info)) + { + if (!slp_scheduled) + { + slp_scheduled = true; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "=== scheduling SLP instances ==="); + is_store = vect_schedule_slp (loop_vinfo, nunits); + + /* IS_STORE is true if STMT is a store. Stores cannot be of + hybrid SLP type. They are removed in + vect_schedule_slp_instance and their vinfo is destroyed. */ + if (is_store) + { + bsi_next (&si); + continue; + } + } + + /* Hybrid SLP stmts must be vectorized in addition to SLP. */ + if (PURE_SLP_STMT (stmt_info)) + { + bsi_next (&si); + continue; + } + } + /* -------- vectorize statement ------------ */ if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "transform statement."); strided_store = false; - is_store = vect_transform_stmt (stmt, &si, &strided_store); + is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL); if (is_store) { stmt_ann_t ann; - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { /* Interleaving. If IS_STORE is TRUE, the vectorization of the interleaving chain was completed - free all the stores in |