diff options
author | Ira Rosen <irar@il.ibm.com> | 2007-09-09 08:46:12 +0000 |
---|---|---|
committer | Ira Rosen <irar@gcc.gnu.org> | 2007-09-09 08:46:12 +0000 |
commit | 805e2059392217cfabc6b719df5a7ee7d0323c7b (patch) | |
tree | 614dcb43b7c25205723a4314d1363a71f9cf26cb /gcc/tree-vect-analyze.c | |
parent | ae2bd7d2edf73ff8386e6185bfc033fa7e6e4633 (diff) | |
download | gcc-805e2059392217cfabc6b719df5a7ee7d0323c7b.zip gcc-805e2059392217cfabc6b719df5a7ee7d0323c7b.tar.gz gcc-805e2059392217cfabc6b719df5a7ee7d0323c7b.tar.bz2 |
tree-vectorizer.h (enum vect_def_type): Start enumeration from 1.
* tree-vectorizer.h (enum vect_def_type): Start enumeration from 1.
(struct _slp_tree, struct _slp_instance): Define new data structures
along macros for their access.
(struct _loop_vec_info): Define new fields: strided_stores,
slp_instances, and slp_unrolling_factor along macros for their access.
(enum slp_vect_type): New.
(struct _stmt_vec_info): Define new field, slp_type, and macros for its
access.
(STMT_VINFO_STRIDED_ACCESS): New macro.
(vect_free_slp_tree): Declare.
(vectorizable_load): Add an argument of type slp_tree.
(vectorizable_store, vectorizable_operation, vectorizable_conversion,
vectorizable_assignment): Likewise.
(vect_model_simple_cost, vect_model_store_cost, vect_model_load_cost):
Declare (make extern).
* tree-vectorizer.c (new_stmt_vec_info): Initiliaze the new field.
(new_loop_vec_info): Likewise.
(destroy_loop_vec_info): Free memory allocated for SLP structures.
* tree-vect-analyze.c: Include recog.h.
(vect_update_slp_costs_according_to_vf): New.
(vect_analyze_operations): Add argument for calls to vectorizable_ ()
functions. For not pure SLP stmts with strided access check that the
group size is power of 2. Update the vectorization factor according to
SLP. Call vect_update_slp_costs_according_to_vf.
(vect_analyze_group_access): New.
(vect_analyze_data_ref_access): Call vect_analyze_group_access.
(vect_free_slp_tree): New functions.
(vect_get_and_check_slp_defs, vect_build_slp_tree, vect_print_slp_tree,
vect_mark_slp_stmts, vect_analyze_slp_instance, vect_analyze_slp,
vect_make_slp_decision, vect_detect_hybrid_slp_stmts,
vect_detect_hybrid_slp): Likewise.
(vect_analyze_loop): Call vect_analyze_slp, vect_make_slp_decision
and vect_detect_hybrid_slp.
* tree-vect-transform.c (vect_estimate_min_profitable_iters): Take
SLP costs into account.
(vect_get_cost_fields): New function.
(vect_model_simple_cost): Make extern, add SLP parameter and handle
SLP.
(vect_model_store_cost, vect_model_load_cost): Likewise.
(vect_get_constant_vectors): New function.
(vect_get_slp_vect_defs, vect_get_slp_defs,
vect_get_vec_defs_for_stmt_copy, vect_get_vec_defs_for_stmt_copy,
vect_get_vec_defs): Likewise.
(vectorizable_reduction): Don't handle SLP for now.
(vectorizable_call): Don't handle SLP for now. Add argument to
vect_model_simple_cost.
(vectorizable_conversion): Handle SLP (call vect_get_vec_defs to
get SLPed and vectorized defs). Fix indentation and spacing.
(vectorizable_assignment): Handle SLP.
(vectorizable_induction): Don't handle SLP for now.
(vectorizable_operation): Likewise.
(vectorizable_type_demotion): Add argument to
vect_model_simple_cost.
(vectorizable_type_promotion): Likewise.
(vectorizable_store, vectorizable_load): Handle SLP.
(vectorizable_condition): Don't handle SLP for now.
(vect_transform_stmt): Add a new argument for SLP. Check that there is
no SLP transformation required for unsupported cases. Add SLP
argument for supported cases.
(vect_remove_stores): New function.
(vect_schedule_slp_instance, vect_schedule_slp): Likewise.
(vect_transform_loop): Schedule SLP instances.
* Makefile.in: (tree-vect-analyze.o): Depend on recog.h.
From-SVN: r128289
Diffstat (limited to 'gcc/tree-vect-analyze.c')
-rw-r--r-- | gcc/tree-vect-analyze.c | 1060 |
1 files changed, 937 insertions, 123 deletions
diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c index a37fcf4..684d12d 100644 --- a/gcc/tree-vect-analyze.c +++ b/gcc/tree-vect-analyze.c @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-scalar-evolution.h" #include "tree-vectorizer.h" #include "toplev.h" +#include "recog.h" /* Main analysis functions. */ static loop_vec_info vect_analyze_loop_form (struct loop *); @@ -300,6 +301,30 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) } +/* SLP costs are calculated according to SLP instance unrolling factor (i.e., + the number of created vector stmts depends on the unrolling factor). However, + the actual number of vector stmts for every SLP node depends on VF which is + set later in vect_analyze_operations(). Hence, SLP costs should be updated. + In this function we assume that the inside costs calculated in + vect_model_xxx_cost are linear in ncopies. */ + +static void +vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo) +{ + unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); + slp_instance instance; + + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ==="); + + for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++) + /* We assume that costs are linear in ncopies. */ + SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf + / SLP_INSTANCE_UNROLLING_FACTOR (instance); +} + + /* Function vect_analyze_operations. Scan the loop stmts and make sure they are all vectorizable. */ @@ -320,6 +345,7 @@ vect_analyze_operations (loop_vec_info loop_vinfo) int min_profitable_iters; int min_scalar_loop_bound; unsigned int th; + bool only_slp_in_loop = true; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_analyze_operations ==="); @@ -456,12 +482,12 @@ vect_analyze_operations (loop_vec_info loop_vinfo) ok = (vectorizable_type_promotion (stmt, NULL, NULL) || vectorizable_type_demotion (stmt, NULL, NULL) - || vectorizable_conversion (stmt, NULL, NULL) - || vectorizable_operation (stmt, NULL, NULL) - || vectorizable_assignment (stmt, NULL, NULL) - || vectorizable_load (stmt, NULL, NULL) + || vectorizable_conversion (stmt, NULL, NULL, NULL) + || vectorizable_operation (stmt, NULL, NULL, NULL) + || vectorizable_assignment (stmt, NULL, NULL, NULL) + || vectorizable_load (stmt, NULL, NULL, NULL) || vectorizable_call (stmt, NULL, NULL) - || vectorizable_store (stmt, NULL, NULL) + || vectorizable_store (stmt, NULL, NULL, NULL) || vectorizable_condition (stmt, NULL, NULL) || vectorizable_reduction (stmt, NULL, NULL)); @@ -480,6 +506,30 @@ vect_analyze_operations (loop_vec_info loop_vinfo) } return false; } + + if (!PURE_SLP_STMT (stmt_info)) + { + /* STMT needs loop-based vectorization. */ + only_slp_in_loop = false; + + /* Groups of strided accesses whose size is not a power of 2 are + not vectorizable yet using loop-vectorization. Therefore, if + this stmt feeds non-SLP-able stmts (i.e., this stmt has to be + both SLPed and loop-based vectorzed), the loop cannot be + vectorized. */ + if (STMT_VINFO_STRIDED_ACCESS (stmt_info) + && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt ( + DR_GROUP_FIRST_DR (stmt_info)))) == -1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "not vectorized: the size of group " + "of strided accesses is not a power of 2"); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + return false; + } + } } /* stmts in bb */ } /* bbs */ @@ -499,6 +549,18 @@ vect_analyze_operations (loop_vec_info loop_vinfo) return false; } + /* If all the stmts in the loop can be SLPed, we perform only SLP, and + vectorization factor of the loop is the unrolling factor required by the + SLP instances. If that unrolling factor is 1, we say, that we perform + pure SLP on loop - cross iteration parallelism is not exploited. */ + if (only_slp_in_loop) + vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo); + else + vectorization_factor = least_common_multiple (vectorization_factor, + LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)); + + LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, @@ -518,6 +580,10 @@ vect_analyze_operations (loop_vec_info loop_vinfo) /* Analyze cost. Decide if worth while to vectorize. */ + /* Once VF is set, SLP costs should be updated since the number of created + vector stmts depends on VF. */ + vect_update_slp_costs_according_to_vf (loop_vinfo); + min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo); LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters; if (min_profitable_iters < 0) @@ -1462,9 +1528,9 @@ vect_update_misalignment_for_peel (struct data_reference *dr, /* For interleaved data accesses the step in the loop must be multiplied by the size of the interleaving group. */ - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info))); - if (DR_GROUP_FIRST_DR (peel_stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info)) dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info); /* It can be assumed that the data refs with the same alignment as dr_peel @@ -1516,7 +1582,7 @@ vect_verify_datarefs_alignment (loop_vec_info loop_vinfo) stmt_vec_info stmt_info = vinfo_for_stmt (stmt); /* For interleaving, only the alignment of the first access matters. */ - if (DR_GROUP_FIRST_DR (stmt_info) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && DR_GROUP_FIRST_DR (stmt_info) != stmt) continue; @@ -1554,7 +1620,7 @@ vector_alignment_reachable_p (struct data_reference *dr) stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree vectype = STMT_VINFO_VECTYPE (stmt_info); - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { /* For interleaved access we peel only if number of iterations in the prolog loop ({VF - misalignment}), is a multiple of the @@ -1768,7 +1834,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) /* For interleaving, only the alignment of the first access matters. */ - if (DR_GROUP_FIRST_DR (stmt_info) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && DR_GROUP_FIRST_DR (stmt_info) != stmt) continue; @@ -1818,7 +1884,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) members of the group, therefore we divide the number of iterations by the group size. */ stmt_info = vinfo_for_stmt (DR_STMT (dr0)); - if (DR_GROUP_FIRST_DR (stmt_info)) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) npeel /= DR_GROUP_SIZE (stmt_info); if (vect_print_dump_info (REPORT_DETAILS)) @@ -1837,7 +1903,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) stmt_info = vinfo_for_stmt (stmt); /* For interleaving, only the alignment of the first access matters. */ - if (DR_GROUP_FIRST_DR (stmt_info) + if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && DR_GROUP_FIRST_DR (stmt_info) != stmt) continue; @@ -1907,7 +1973,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) /* For interleaving, only the alignment of the first access matters. */ if (aligned_access_p (dr) - || (DR_GROUP_FIRST_DR (stmt_info) + || (STMT_VINFO_STRIDED_ACCESS (stmt_info) && DR_GROUP_FIRST_DR (stmt_info) != stmt)) continue; @@ -2019,13 +2085,13 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) } -/* Function vect_analyze_data_ref_access. - - Analyze the access pattern of the data-reference DR. For now, a data access - has to be consecutive to be considered vectorizable. */ +/* Analyze groups of strided accesses: check that DR belongs to a group of + strided accesses of legal size, step, etc. Detect gaps, single element + interleaving, and other special cases. Set strided access info. + Collect groups of strided stores for further use in SLP analysis. */ static bool -vect_analyze_data_ref_access (struct data_reference *dr) +vect_analyze_group_access (struct data_reference *dr) { tree step = DR_STEP (dr); tree scalar_type = TREE_TYPE (DR_REF (dr)); @@ -2033,50 +2099,14 @@ vect_analyze_data_ref_access (struct data_reference *dr) tree stmt = DR_STMT (dr); stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); HOST_WIDE_INT stride; + bool slp_impossible = false; - /* Don't allow invariant accesses. */ - if (dr_step == 0) - return false; - - if (nested_in_vect_loop_p (loop, stmt)) - { - /* For the rest of the analysis we use the outer-loop step. */ - step = STMT_VINFO_DR_STEP (stmt_info); - dr_step = TREE_INT_CST_LOW (step); - - if (dr_step == 0) - { - if (vect_print_dump_info (REPORT_ALIGNMENT)) - fprintf (vect_dump, "zero step in outer loop."); - if (DR_IS_READ (dr)) - return true; - else - return false; - } - } - /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the interleaving group (including gaps). */ stride = dr_step / type_size; - /* Consecutive? */ - if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) - { - /* Mark that it is not interleaving. */ - DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE; - return true; - } - - if (nested_in_vect_loop_p (loop, stmt)) - { - if (vect_print_dump_info (REPORT_ALIGNMENT)) - fprintf (vect_dump, "strided access in outer loop."); - return false; - } - /* Not consecutive access is possible only if it is a part of interleaving. */ if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt))) { @@ -2119,99 +2149,105 @@ vect_analyze_data_ref_access (struct data_reference *dr) HOST_WIDE_INT diff, count_in_bytes; while (next) - { - /* Skip same data-refs. In case that two or more stmts share data-ref - (supported only for loads), we vectorize only the first stmt, and - the rest get their vectorized loads from the first one. */ - if (!tree_int_cst_compare (DR_INIT (data_ref), - DR_INIT (STMT_VINFO_DATA_REF ( - vinfo_for_stmt (next))))) - { + { + /* Skip same data-refs. In case that two or more stmts share data-ref + (supported only for loads), we vectorize only the first stmt, and + the rest get their vectorized loads from the first one. */ + if (!tree_int_cst_compare (DR_INIT (data_ref), + DR_INIT (STMT_VINFO_DATA_REF ( + vinfo_for_stmt (next))))) + { if (!DR_IS_READ (data_ref)) - { + { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "Two store stmts share the same dr."); - return false; + return false; } - /* Check that there is no load-store dependencies for this loads + /* Check that there is no load-store dependencies for this loads to prevent a case of load-store-load to the same location. */ if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next)) || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev))) { if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, + fprintf (vect_dump, "READ_WRITE dependence in interleaving."); return false; } - /* For load use the same data-ref load. */ - DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev; + /* For load use the same data-ref load. */ + DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev; - prev = next; - next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); - continue; - } - prev = next; + prev = next; + next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); + continue; + } + prev = next; - /* Check that all the accesses have the same STEP. */ - next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next))); - if (tree_int_cst_compare (step, next_step)) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "not consecutive access in interleaving"); - return false; - } + /* Check that all the accesses have the same STEP. */ + next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next))); + if (tree_int_cst_compare (step, next_step)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "not consecutive access in interleaving"); + return false; + } - data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next)); - /* Check that the distance between two accesses is equal to the type - size. Otherwise, we have gaps. */ - diff = (TREE_INT_CST_LOW (DR_INIT (data_ref)) - - TREE_INT_CST_LOW (prev_init)) / type_size; - if (!DR_IS_READ (data_ref) && diff != 1) + data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next)); + /* Check that the distance between two accesses is equal to the type + size. Otherwise, we have gaps. */ + diff = (TREE_INT_CST_LOW (DR_INIT (data_ref)) + - TREE_INT_CST_LOW (prev_init)) / type_size; + if (diff != 1) { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "interleaved store with gaps"); - return false; + /* FORNOW: SLP of accesses with gaps is not supported. */ + slp_impossible = true; + if (!DR_IS_READ (data_ref)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "interleaved store with gaps"); + return false; + } } - /* Store the gap from the previous member of the group. If there is no + + /* Store the gap from the previous member of the group. If there is no gap in the access, DR_GROUP_GAP is always 1. */ - DR_GROUP_GAP (vinfo_for_stmt (next)) = diff; + DR_GROUP_GAP (vinfo_for_stmt (next)) = diff; - prev_init = DR_INIT (data_ref); - next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); - /* Count the number of data-refs in the chain. */ - count++; - } + prev_init = DR_INIT (data_ref); + next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); + /* Count the number of data-refs in the chain. */ + count++; + } - /* COUNT is the number of accesses found, we multiply it by the size of - the type to get COUNT_IN_BYTES. */ + /* COUNT is the number of accesses found, we multiply it by the size of + the type to get COUNT_IN_BYTES. */ count_in_bytes = type_size * count; /* Check that the size of the interleaving is not greater than STEP. */ - if (dr_step < count_in_bytes) - { - if (vect_print_dump_info (REPORT_DETAILS)) - { - fprintf (vect_dump, "interleaving size is greater than step for "); - print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); - } - return false; - } + if (dr_step < count_in_bytes) + { + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "interleaving size is greater than step for "); + print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); + } + return false; + } - /* Check that the size of the interleaving is equal to STEP for stores, - i.e., that there are no gaps. */ - if (!DR_IS_READ (dr) && dr_step != count_in_bytes) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "interleaved store with gaps"); - return false; - } + /* Check that the size of the interleaving is equal to STEP for stores, + i.e., that there are no gaps. */ + if (!DR_IS_READ (dr) && dr_step != count_in_bytes) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "interleaved store with gaps"); + return false; + } /* Check that STEP is a multiple of type size. */ if ((dr_step % type_size) != 0) - { - if (vect_print_dump_info (REPORT_DETAILS)) + { + if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "step is not a multiple of type size: step "); print_generic_expr (vect_dump, step, TDF_SLIM); @@ -2219,22 +2255,98 @@ vect_analyze_data_ref_access (struct data_reference *dr) print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type), TDF_SLIM); } - return false; - } + return false; + } - /* FORNOW: we handle only interleaving that is a power of 2. */ + /* FORNOW: we handle only interleaving that is a power of 2. + We don't fail here if it may be still possible to vectorize the + group using SLP. If not, the size of the group will be checked in + vect_analyze_operations, and the vectorization will fail. */ if (exact_log2 (stride) == -1) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "interleaving is not a power of 2"); - return false; + + if (slp_impossible) + return false; } DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride; + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Detected interleaving of size %d", (int)stride); + + /* SLP: create an SLP data structure for every interleaving group of + stores for further analysis in vect_analyse_slp. */ + if (!DR_IS_READ (dr) && !slp_impossible) + VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt); } + return true; } +/* Analyze the access pattern of the data-reference DR. + In case of non-consecutive accesse call vect_analyze_group_access() to + analyze groups of strided accesses. */ + +static bool +vect_analyze_data_ref_access (struct data_reference *dr) +{ + tree step = DR_STEP (dr); + tree scalar_type = TREE_TYPE (DR_REF (dr)); + tree stmt = DR_STMT (dr); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); + + if (!step) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "bad data-ref access"); + return false; + } + + /* Don't allow invariant accesses. */ + if (dr_step == 0) + return false; + + if (nested_in_vect_loop_p (loop, stmt)) + { + /* For the rest of the analysis we use the outer-loop step. */ + step = STMT_VINFO_DR_STEP (stmt_info); + dr_step = TREE_INT_CST_LOW (step); + + if (dr_step == 0) + { + if (vect_print_dump_info (REPORT_ALIGNMENT)) + fprintf (vect_dump, "zero step in outer loop."); + if (DR_IS_READ (dr)) + return true; + else + return false; + } + } + + /* Consecutive? */ + if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) + { + /* Mark that it is not interleaving. */ + DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE; + return true; + } + + if (nested_in_vect_loop_p (loop, stmt)) + { + if (vect_print_dump_info (REPORT_ALIGNMENT)) + fprintf (vect_dump, "strided access in outer loop."); + return false; + } + + /* Not consecutive access - check if it's a part of interleaving group. */ + return vect_analyze_group_access (dr); +} + + /* Function vect_analyze_data_ref_accesses. Analyze the access pattern of all the data references in the loop. @@ -2266,6 +2378,697 @@ vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo) } +/* Recursively free the memory allocated for the SLP tree rooted at NODE. */ + +void +vect_free_slp_tree (slp_tree node) +{ + if (!node) + return; + + if (SLP_TREE_LEFT (node)) + vect_free_slp_tree (SLP_TREE_LEFT (node)); + + if (SLP_TREE_RIGHT (node)) + vect_free_slp_tree (SLP_TREE_RIGHT (node)); + + VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node)); + + if (SLP_TREE_VEC_STMTS (node)) + VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node)); + + free (node); +} + + +/* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are + of a legal type and that they match the defs of the first stmt of the SLP + group (stored in FIRST_STMT_...). */ + +static bool +vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node, + tree rhs, VEC (tree, heap) **def_stmts0, + VEC (tree, heap) **def_stmts1, + enum vect_def_type *first_stmt_dt0, + enum vect_def_type *first_stmt_dt1, + tree *first_stmt_def0_type, + tree *first_stmt_def1_type, + tree *first_stmt_const_oprnd, + int ncopies_for_cost) +{ + tree oprnd; + enum operation_type op_type = TREE_OPERAND_LENGTH (rhs); + unsigned int i, number_of_oprnds = op_type; + tree def, def_stmt; + enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; + stmt_vec_info stmt_info = + vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0)); + + /* Store. */ + if (!op_type) + number_of_oprnds = 1; + else + gcc_assert (op_type == unary_op || op_type == binary_op); + + for (i = 0; i < number_of_oprnds; i++) + { + if (op_type) + oprnd = TREE_OPERAND (rhs, i); + else + oprnd = rhs; + + if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i]) + || (!def_stmt && dt[i] != vect_constant_def)) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: can't find def for "); + print_generic_expr (vect_dump, oprnd, TDF_SLIM); + } + + return false; + } + + if (!*first_stmt_dt0) + { + /* op0 of the first stmt of the group - store its info. */ + *first_stmt_dt0 = dt[i]; + if (def) + *first_stmt_def0_type = TREE_TYPE (def); + else + *first_stmt_const_oprnd = oprnd; + + /* Analyze costs (for the first stmt of the group only). */ + if (op_type) + /* Not memory operation (we don't call this functions for loads). */ + vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node); + else + /* Store. */ + vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node); + } + + else + { + if (!*first_stmt_dt1 && i == 1) + { + /* op1 of the first stmt of the group - store its info. */ + *first_stmt_dt1 = dt[i]; + if (def) + *first_stmt_def1_type = TREE_TYPE (def); + else + { + /* We assume that the stmt contains only one constant + operand. We fail otherwise, to be on the safe side. */ + if (*first_stmt_const_oprnd) + { + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "Build SLP failed: two constant " + "oprnds in stmt"); + return false; + } + *first_stmt_const_oprnd = oprnd; + } + } + else + { + /* Not first stmt of the group, check that the def-stmt/s match + the def-stmt/s of the first stmt. */ + if ((i == 0 + && (*first_stmt_dt0 != dt[i] + || (*first_stmt_def0_type && def + && *first_stmt_def0_type != TREE_TYPE (def)))) + || (i == 1 + && (*first_stmt_dt1 != dt[i] + || (*first_stmt_def1_type && def + && *first_stmt_def1_type != TREE_TYPE (def)))) + || (!def + && TREE_TYPE (*first_stmt_const_oprnd) + != TREE_TYPE (oprnd))) + { + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "Build SLP failed: different types "); + + return false; + } + } + } + + /* Check the types of the definitions. */ + switch (dt[i]) + { + case vect_constant_def: + case vect_invariant_def: + break; + + case vect_loop_def: + if (i == 0) + VEC_safe_push (tree, heap, *def_stmts0, def_stmt); + else + VEC_safe_push (tree, heap, *def_stmts1, def_stmt); + break; + + default: + /* FORNOW: Not supported. */ + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: illegal type of def "); + print_generic_expr (vect_dump, def, TDF_SLIM); + } + + return false; + } + } + + return true; +} + + +/* Recursively build an SLP tree starting from NODE. + Fail (and return FALSE) if def-stmts are not isomorphic, require data + permutation or are of unsupported types of operation. Otherwise, return + TRUE. + SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example + in the case of multiple types for now. */ + +static bool +vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node, + unsigned int group_size, bool *slp_impossible, + int *inside_cost, int *outside_cost, + int ncopies_for_cost) +{ + VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size); + VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size); + unsigned int i; + VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node); + tree stmt = VEC_index (tree, stmts, 0); + enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0; + enum tree_code first_stmt_code = 0; + tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE; + tree lhs, rhs, prev_stmt = NULL_TREE; + bool stop_recursion = false, need_same_oprnds = false; + tree vectype, scalar_type, first_op1 = NULL_TREE; + unsigned int vectorization_factor = 0, ncopies; + optab optab; + int icode; + enum machine_mode optab_op2_mode; + enum machine_mode vec_mode; + tree first_stmt_const_oprnd = NULL_TREE; + struct data_reference *first_dr; + + /* For every stmt in NODE find its def stmt/s. */ + for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP for "); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + + if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT "); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + + return false; + } + + scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0)); + vectype = get_vectype_for_scalar_type (scalar_type); + gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); + vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype); + if (ncopies > 1) + { + /* FORNOW. */ + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "SLP failed - multiple types "); + + *slp_impossible = true; + return false; + } + + lhs = GIMPLE_STMT_OPERAND (stmt, 0); + rhs = GIMPLE_STMT_OPERAND (stmt, 1); + + /* Check the operation. */ + if (i == 0) + { + first_stmt_code = TREE_CODE (rhs); + + /* Shift arguments should be equal in all the packed stmts for a + vector shift with scalar shift operand. */ + if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR) + { + vec_mode = TYPE_MODE (vectype); + optab = optab_for_tree_code (TREE_CODE (rhs), vectype); + if (!optab) + { + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "Build SLP failed: no optab."); + return false; + } + icode = (int) optab->handlers[(int) vec_mode].insn_code; + optab_op2_mode = insn_data[icode].operand[2].mode; + if (!VECTOR_MODE_P (optab_op2_mode)) + { + need_same_oprnds = true; + first_op1 = TREE_OPERAND (rhs, 1); + } + } + } + else + { + if (first_stmt_code != TREE_CODE (rhs)) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, + "Build SLP failed: different operation in stmt "); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + + return false; + } + + if (need_same_oprnds + && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0)) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, + "Build SLP failed: different shift arguments in "); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + + return false; + } + } + + /* Strided store or load. */ + if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))) + { + if (REFERENCE_CLASS_P (lhs)) + { + /* Store. */ + if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, + &def_stmts0, &def_stmts1, + &first_stmt_dt0, + &first_stmt_dt1, + &first_stmt_def0_type, + &first_stmt_def1_type, + &first_stmt_const_oprnd, + ncopies_for_cost)) + return false; + } + else + { + /* Load. */ + if (i == 0) + { + /* First stmt of the SLP group should be the first load of + the interleaving loop if data permutation is not + allowed. */ + if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt) + { + /* FORNOW: data permutations are not supported. */ + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: strided " + " loads need permutation "); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + + return false; + } + + first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); + if (vect_supportable_dr_alignment (first_dr) + == dr_unaligned_unsupported) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: unsupported " + " unaligned load "); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + + return false; + } + + /* Analyze costs (for the first stmt in the group). */ + vect_model_load_cost (vinfo_for_stmt (stmt), + ncopies_for_cost, *node); + } + else + { + if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt) + { + /* FORNOW: data permutations are not supported. */ + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: strided " + " loads need permutation "); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + return false; + } + } + + prev_stmt = stmt; + + /* We stop the tree when we reach a group of loads. */ + stop_recursion = true; + continue; + } + } /* Strided access. */ + else + { + if (REFERENCE_CLASS_P (rhs)) + { + /* Not strided load. */ + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: not strided load "); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + + /* FORNOW: Not strided loads are not supported. */ + return false; + } + + /* Not memory operation. */ + if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs)) + { + if (vect_print_dump_info (REPORT_SLP)) + { + fprintf (vect_dump, "Build SLP failed: operation"); + fprintf (vect_dump, " unsupported "); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + + return false; + } + + /* Find the def-stmts. */ + if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0, + &def_stmts1, &first_stmt_dt0, + &first_stmt_dt1, + &first_stmt_def0_type, + &first_stmt_def1_type, + &first_stmt_const_oprnd, + ncopies_for_cost)) + return false; + } + } + + /* Add the costs of the node to the overall instance costs. */ + *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node); + *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node); + + /* Strided loads were reached - stop the recursion. */ + if (stop_recursion) + return true; + + /* Create SLP_TREE nodes for the definition node/s. */ + if (first_stmt_dt0 == vect_loop_def) + { + slp_tree left_node = XNEW (struct _slp_tree); + SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0; + SLP_TREE_VEC_STMTS (left_node) = NULL; + SLP_TREE_LEFT (left_node) = NULL; + SLP_TREE_RIGHT (left_node) = NULL; + SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0; + SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0; + if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size, + slp_impossible, inside_cost, outside_cost, + ncopies_for_cost)) + return false; + + SLP_TREE_LEFT (*node) = left_node; + } + + if (first_stmt_dt1 == vect_loop_def) + { + slp_tree right_node = XNEW (struct _slp_tree); + SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1; + SLP_TREE_VEC_STMTS (right_node) = NULL; + SLP_TREE_LEFT (right_node) = NULL; + SLP_TREE_RIGHT (right_node) = NULL; + SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0; + SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0; + if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size, + slp_impossible, inside_cost, outside_cost, + ncopies_for_cost)) + return false; + + SLP_TREE_RIGHT (*node) = right_node; + } + + return true; +} + + +static void +vect_print_slp_tree (slp_tree node) +{ + int i; + tree stmt; + + if (!node) + return; + + fprintf (vect_dump, "node "); + for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++) + { + fprintf (vect_dump, "\n\tstmt %d ", i); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + fprintf (vect_dump, "\n"); + + vect_print_slp_tree (SLP_TREE_LEFT (node)); + vect_print_slp_tree (SLP_TREE_RIGHT (node)); +} + + +/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID). + If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index + J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the + stmts in NODE are to be marked. */ + +static void +vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j) +{ + int i; + tree stmt; + + if (!node) + return; + + for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++) + if (j < 0 || i == j) + STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark; + + vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j); + vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j); +} + + +/* Analyze an SLP instance starting from a group of strided stores. Call + vect_build_slp_tree to build a tree of packed stmts if possible. + Return FALSE if it's impossible to SLP any stmt in the loop. */ + +static bool +vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt) +{ + slp_instance new_instance; + slp_tree node = XNEW (struct _slp_tree); + unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt)); + unsigned int unrolling_factor = 1, nunits; + tree vectype, scalar_type, next; + unsigned int vectorization_factor = 0, ncopies; + bool slp_impossible = false; + int inside_cost = 0, outside_cost = 0, ncopies_for_cost; + + /* FORNOW: multiple types are not supported. */ + scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))); + vectype = get_vectype_for_scalar_type (scalar_type); + nunits = TYPE_VECTOR_SUBPARTS (vectype); + vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + ncopies = vectorization_factor / nunits; + if (ncopies > 1) + { + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "SLP failed - multiple types "); + + return false; + } + + /* Create a node (a root of the SLP tree) for the packed strided stores. */ + SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size); + next = stmt; + /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ + while (next) + { + VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next); + next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); + } + + SLP_TREE_VEC_STMTS (node) = NULL; + SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0; + SLP_TREE_LEFT (node) = NULL; + SLP_TREE_RIGHT (node) = NULL; + SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0; + SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0; + + /* Calculate the unrolling factor. */ + unrolling_factor = least_common_multiple (nunits, group_size) / group_size; + + /* Calculate the number of vector stmts to create based on the unrolling + factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is + GROUP_SIZE / NUNITS otherwise. */ + ncopies_for_cost = unrolling_factor * group_size / nunits; + + /* Build the tree for the SLP instance. */ + if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible, + &inside_cost, &outside_cost, ncopies_for_cost)) + { + /* Create a new SLP instance. */ + new_instance = XNEW (struct _slp_instance); + SLP_INSTANCE_TREE (new_instance) = node; + SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; + SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; + SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost; + SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost; + VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo), + new_instance); + if (vect_print_dump_info (REPORT_SLP)) + vect_print_slp_tree (node); + + return true; + } + + /* Failed to SLP. */ + /* Free the allocated memory. */ + vect_free_slp_tree (node); + + if (slp_impossible) + return false; + + /* SLP failed for this instance, but it is still possible to SLP other stmts + in the loop. */ + return true; +} + + +/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP + trees of packed scalar stmts if SLP is possible. */ + +static bool +vect_analyze_slp (loop_vec_info loop_vinfo) +{ + unsigned int i; + VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo); + tree store; + + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "=== vect_analyze_slp ==="); + + for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++) + if (!vect_analyze_slp_instance (loop_vinfo, store)) + { + /* SLP failed. No instance can be SLPed in the loop. */ + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) + fprintf (vect_dump, "SLP failed."); + + return false; + } + + return true; +} + + +/* For each possible SLP instance decide whether to SLP it and calculate overall + unrolling factor needed to SLP the loop. */ + +static void +vect_make_slp_decision (loop_vec_info loop_vinfo) +{ + unsigned int i, unrolling_factor = 1; + VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); + slp_instance instance; + int decided_to_slp = 0; + + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "=== vect_make_slp_decision ==="); + + for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++) + { + /* FORNOW: SLP if you can. */ + if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance)) + unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance); + + /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we + call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and + loop-based vectorization. Such stmts will be marked as HYBRID. */ + vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1); + decided_to_slp++; + } + + LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor; + + if (decided_to_slp && vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d", + decided_to_slp, unrolling_factor); +} + + +/* Find stmts that must be both vectorized and SLPed (since they feed stmts that + can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */ + +static void +vect_detect_hybrid_slp_stmts (slp_tree node) +{ + int i; + tree stmt; + imm_use_iterator imm_iter; + tree use_stmt; + + if (!node) + return; + + for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++) + if (PURE_SLP_STMT (vinfo_for_stmt (stmt)) + && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME) + FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0)) + if (vinfo_for_stmt (use_stmt) + && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt))) + vect_mark_slp_stmts (node, hybrid, i); + + vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node)); + vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node)); +} + + +/* Find stmts that must be both vectorized and SLPed. */ + +static void +vect_detect_hybrid_slp (loop_vec_info loop_vinfo) +{ + unsigned int i; + VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); + slp_instance instance; + + if (vect_print_dump_info (REPORT_SLP)) + fprintf (vect_dump, "=== vect_detect_hybrid_slp ==="); + + for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++) + vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance)); +} + + /* Function vect_analyze_data_refs. Find all the data references in the loop. @@ -3424,6 +4227,17 @@ vect_analyze_loop (struct loop *loop) return NULL; } + /* Check the SLP opportunities in the loop, analyze and build SLP trees. */ + ok = vect_analyze_slp (loop_vinfo); + if (ok) + { + /* Decide which possible SLP instances to SLP. */ + vect_make_slp_decision (loop_vinfo); + + /* Find stmts that need to be both vectorized and SLPed. */ + vect_detect_hybrid_slp (loop_vinfo); + } + /* This pass will decide on using loop versioning and/or loop peeling in order to enhance the alignment of data references in the loop. */ |