/* Analysis Utilities for Loop Vectorization. Copyright (C) 2003,2004,2005 Free Software Foundation, Inc. Contributed by Dorit Naishlos This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING. If not, write to the Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "config.h" #include "system.h" #include "coretypes.h" #include "tm.h" #include "ggc.h" #include "tree.h" #include "basic-block.h" #include "diagnostic.h" #include "tree-flow.h" #include "tree-dump.h" #include "timevar.h" #include "cfgloop.h" #include "expr.h" #include "optabs.h" #include "tree-chrec.h" #include "tree-data-ref.h" #include "tree-scalar-evolution.h" #include "tree-vectorizer.h" /* Main analysis functions. */ static loop_vec_info vect_analyze_loop_form (struct loop *); static bool vect_analyze_data_refs (loop_vec_info); static bool vect_mark_stmts_to_be_vectorized (loop_vec_info); static void vect_analyze_scalar_cycles (loop_vec_info); static bool vect_analyze_data_ref_accesses (loop_vec_info); static bool vect_analyze_data_ref_dependences (loop_vec_info); static bool vect_analyze_data_refs_alignment (loop_vec_info); static bool vect_compute_data_refs_alignment (loop_vec_info); static void vect_enhance_data_refs_alignment (loop_vec_info); static bool vect_analyze_operations (loop_vec_info); static bool vect_determine_vectorization_factor (loop_vec_info); /* Utility functions for the analyses. */ static bool exist_non_indexing_operands_for_use_p (tree, tree); static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool); static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *); static tree vect_get_loop_niters (struct loop *, tree *); static bool vect_analyze_data_ref_dependence (struct data_dependence_relation *, loop_vec_info); static bool vect_compute_data_ref_alignment (struct data_reference *); static bool vect_analyze_data_ref_access (struct data_reference *); static bool vect_can_advance_ivs_p (loop_vec_info); /* Function vect_determine_vectorization_factor Determine the vectorization factor (VF). VF is the number of data elements that are operated upon in parallel in a single iteration of the vectorized loop. For example, when vectorizing a loop that operates on 4byte elements, on a target with vector size (VS) 16byte, the VF is set to 4, since 4 elements can fit in a single vector register. We currently support vectorization of loops in which all types operated upon are of the same size. Therefore this function currently sets VF according to the size of the types operated upon, and fails if there are multiple sizes in the loop. VF is also the factor by which the loop iterations are strip-mined, e.g.: original loop: for (i=0; inum_nodes; block_stmt_iterator si; unsigned int vectorization_factor = 0; int i; tree scalar_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); for (i = 0; i < nbbs; i++) { basic_block bb = bbs[i]; for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) { tree stmt = bsi_stmt (si); unsigned int nunits; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree vectype; if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "==> examining statement: "); print_generic_expr (vect_dump, stmt, TDF_SLIM); } gcc_assert (stmt_info); /* skip stmts which do not need to be vectorized. */ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !STMT_VINFO_LIVE_P (stmt_info)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "skip."); continue; } if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) { fprintf (vect_dump, "not vectorized: vector stmt in loop:"); print_generic_expr (vect_dump, stmt, TDF_SLIM); } return false; } if (STMT_VINFO_DATA_REF (stmt_info)) scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); else if (TREE_CODE (stmt) == MODIFY_EXPR) scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0)); else scalar_type = TREE_TYPE (stmt); if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "get vectype for scalar type: "); print_generic_expr (vect_dump, scalar_type, TDF_SLIM); } vectype = get_vectype_for_scalar_type (scalar_type); if (!vectype) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) { fprintf (vect_dump, "not vectorized: unsupported data-type "); print_generic_expr (vect_dump, scalar_type, TDF_SLIM); } return false; } if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "vectype: "); print_generic_expr (vect_dump, vectype, TDF_SLIM); } STMT_VINFO_VECTYPE (stmt_info) = vectype; nunits = TYPE_VECTOR_SUBPARTS (vectype); if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "nunits = %d", nunits); if (vectorization_factor) { /* FORNOW: don't allow mixed units. This restriction will be relaxed in the future. */ if (nunits != vectorization_factor) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) fprintf (vect_dump, "not vectorized: mixed data-types"); return false; } } else vectorization_factor = nunits; gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type)) * vectorization_factor == UNITS_PER_SIMD_WORD); } } /* TODO: Analyze cost. Decide if worth while to vectorize. */ if (vectorization_factor <= 1) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) fprintf (vect_dump, "not vectorized: unsupported data-type"); return false; } LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; return true; } /* Function vect_analyze_operations. Scan the loop stmts and make sure they are all vectorizable. */ static bool vect_analyze_operations (loop_vec_info loop_vinfo) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); int nbbs = loop->num_nodes; block_stmt_iterator si; unsigned int vectorization_factor = 0; int i; bool ok; tree phi; stmt_vec_info stmt_info; bool need_to_vectorize = false; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_analyze_operations ==="); gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); for (i = 0; i < nbbs; i++) { basic_block bb = bbs[i]; for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) { stmt_info = vinfo_for_stmt (phi); if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "examining phi: "); print_generic_expr (vect_dump, phi, TDF_SLIM); } gcc_assert (stmt_info); if (STMT_VINFO_LIVE_P (stmt_info)) { /* FORNOW: not yet supported. */ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) fprintf (vect_dump, "not vectorized: value used after loop."); return false; } if (STMT_VINFO_RELEVANT_P (stmt_info)) { /* Most likely a reduction-like computation that is used in the loop. */ if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) fprintf (vect_dump, "not vectorized: unsupported pattern."); return false; } } for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) { tree stmt = bsi_stmt (si); stmt_vec_info stmt_info = vinfo_for_stmt (stmt); if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "==> examining statement: "); print_generic_expr (vect_dump, stmt, TDF_SLIM); } gcc_assert (stmt_info); /* skip stmts which do not need to be vectorized. this is expected to include: - the COND_EXPR which is the loop exit condition - any LABEL_EXPRs in the loop - computations that are used only for array indexing or loop control */ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !STMT_VINFO_LIVE_P (stmt_info)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "irrelevant."); continue; } if (STMT_VINFO_RELEVANT_P (stmt_info)) { gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))); gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); ok = (vectorizable_operation (stmt, NULL, NULL) || vectorizable_assignment (stmt, NULL, NULL) || vectorizable_load (stmt, NULL, NULL) || vectorizable_store (stmt, NULL, NULL) || vectorizable_condition (stmt, NULL, NULL)); if (!ok) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) { fprintf (vect_dump, "not vectorized: relevant stmt not supported: "); print_generic_expr (vect_dump, stmt, TDF_SLIM); } return false; } need_to_vectorize = true; } if (STMT_VINFO_LIVE_P (stmt_info)) { ok = vectorizable_reduction (stmt, NULL, NULL); if (ok) need_to_vectorize = true; else ok = vectorizable_live_operation (stmt, NULL, NULL); if (!ok) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) { fprintf (vect_dump, "not vectorized: live stmt not supported: "); print_generic_expr (vect_dump, stmt, TDF_SLIM); } return false; } } } /* stmts in bb */ } /* bbs */ /* TODO: Analyze cost. Decide if worth while to vectorize. */ /* All operations in the loop are either irrelevant (deal with loop control, or dead), or only used outside the loop and can be moved out of the loop (e.g. invariants, inductions). The loop can be optimized away by scalar optimizations. We're better off not touching this loop. */ if (!need_to_vectorize) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "All the computation can be taken out of the loop."); if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) fprintf (vect_dump, "not vectorized: redundant loop. no profit to vectorize."); return false; } if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC, vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo)); if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) fprintf (vect_dump, "not vectorized: iteration count too small."); return false; } if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "epilog loop required."); if (!vect_can_advance_ivs_p (loop_vinfo)) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) fprintf (vect_dump, "not vectorized: can't create epilog loop 1."); return false; } if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit)) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) fprintf (vect_dump, "not vectorized: can't create epilog loop 2."); return false; } } return true; } /* Function exist_non_indexing_operands_for_use_p USE is one of the uses attached to STMT. Check if USE is used in STMT for anything other than indexing an array. */ static bool exist_non_indexing_operands_for_use_p (tree use, tree stmt) { tree operand; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); /* USE corresponds to some operand in STMT. If there is no data reference in STMT, then any operand that corresponds to USE is not indexing an array. */ if (!STMT_VINFO_DATA_REF (stmt_info)) return true; /* STMT has a data_ref. FORNOW this means that its of one of the following forms: -1- ARRAY_REF = var -2- var = ARRAY_REF (This should have been verified in analyze_data_refs). 'var' in the second case corresponds to a def, not a use, so USE cannot correspond to any operands that are not used for array indexing. Therefore, all we need to check is if STMT falls into the first case, and whether var corresponds to USE. */ if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME) return false; operand = TREE_OPERAND (stmt, 1); if (TREE_CODE (operand) != SSA_NAME) return false; if (operand == use) return true; return false; } /* Function vect_analyze_scalar_cycles. Examine the cross iteration def-use cycles of scalar variables, by analyzing the loop (scalar) PHIs; Classify each cycle as one of the following: invariant, induction, reduction, unknown. Some forms of scalar cycles are not yet supported. Example1: reduction: (unsupported yet) loop1: for (i=0; iheader; tree dummy; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_analyze_scalar_cycles ==="); for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) { tree access_fn = NULL; tree def = PHI_RESULT (phi); stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi); tree reduc_stmt; if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "Analyze phi: "); print_generic_expr (vect_dump, phi, TDF_SLIM); } /* Skip virtual phi's. The data dependences that are associated with virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ if (!is_gimple_reg (SSA_NAME_VAR (def))) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "virtual phi. skip."); continue; } STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type; /* Analyze the evolution function. */ access_fn = analyze_scalar_evolution (loop, def); if (!access_fn) continue; if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "Access function of PHI: "); print_generic_expr (vect_dump, access_fn, TDF_SLIM); } if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "Detected induction."); STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def; continue; } /* TODO: handle invariant phis */ reduc_stmt = vect_is_simple_reduction (loop, phi); if (reduc_stmt) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "Detected reduction."); STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def; STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = vect_reduction_def; } else if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "Unknown def-use cycle pattern."); } return; } /* Function vect_analyze_data_ref_dependence. Return TRUE if there (might) exist a dependence between a memory-reference DRA and a memory-reference DRB. */ static bool vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, loop_vec_info loop_vinfo) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); int dist = 0; unsigned int loop_depth = 0; struct loop *loop_nest = loop; struct data_reference *dra = DDR_A (ddr); struct data_reference *drb = DDR_B (ddr); stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); if (DDR_ARE_DEPENDENT (ddr) == chrec_known) return false; if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) { fprintf (vect_dump, "not vectorized: can't determine dependence between "); print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); fprintf (vect_dump, " and "); print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); } return true; } if (!DDR_DIST_VECT (ddr)) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) { fprintf (vect_dump, "not vectorized: bad dist vector for "); print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); fprintf (vect_dump, " and "); print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); } return true; } /* Find loop depth. */ while (loop_nest && loop_nest->outer && loop_nest->outer->outer) { loop_nest = loop_nest->outer; loop_depth++; } dist = DDR_DIST_VECT (ddr)[loop_depth]; if (vect_print_dump_info (REPORT_DR_DETAILS)) fprintf (vect_dump, "dependence distance = %d.",dist); /* Same loop iteration. */ if (dist % vectorization_factor == 0) { /* Two references with distance zero have the same alignment. */ VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb); VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra); if (vect_print_dump_info (REPORT_ALIGNMENT)) fprintf (vect_dump, "accesses have the same alignment."); if (vect_print_dump_info (REPORT_DR_DETAILS)) { fprintf (vect_dump, "dependence distance modulo vf == 0 between "); print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); fprintf (vect_dump, " and "); print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); } return false; } if (abs (dist) >= vectorization_factor) { /* Dependence distance does not create dependence, as far as vectorization is concerned, in this case. */ if (vect_print_dump_info (REPORT_DR_DETAILS)) fprintf (vect_dump, "dependence distance >= VF."); return false; } if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) { fprintf (vect_dump, "not vectorized: possible dependence between data-refs "); print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); fprintf (vect_dump, " and "); print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); } return true; } /* Function vect_analyze_data_ref_dependences. Examine all the data references in the loop, and make sure there do not exist any data dependences between them. */ static bool vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo) { unsigned int i; varray_type ddrs = LOOP_VINFO_DDRS (loop_vinfo); if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_analyze_dependences ==="); for (i = 0; i < VARRAY_ACTIVE_SIZE (ddrs); i++) { struct data_dependence_relation *ddr = VARRAY_GENERIC_PTR (ddrs, i); if (vect_analyze_data_ref_dependence (ddr, loop_vinfo)) return false; } return true; } /* Function vect_compute_data_ref_alignment Compute the misalignment of the data reference DR. Output: 1. If during the misalignment computation it is found that the data reference cannot be vectorized then false is returned. 2. DR_MISALIGNMENT (DR) is defined. FOR NOW: No analysis is actually performed. Misalignment is calculated only for trivial cases. TODO. */ static bool vect_compute_data_ref_alignment (struct data_reference *dr) { tree stmt = DR_STMT (dr); stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree ref = DR_REF (dr); tree vectype; tree base, base_addr; bool base_aligned; tree misalign; tree aligned_to, alignment; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vect_compute_data_ref_alignment:"); /* Initialize misalignment to unknown. */ DR_MISALIGNMENT (dr) = -1; misalign = DR_OFFSET_MISALIGNMENT (dr); aligned_to = DR_ALIGNED_TO (dr); base_addr = DR_BASE_ADDRESS (dr); base = build_fold_indirect_ref (base_addr); vectype = STMT_VINFO_VECTYPE (stmt_info); alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT); if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0) || !misalign) { if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "Unknown alignment for access: "); print_generic_expr (vect_dump, base, TDF_SLIM); } return true; } if ((DECL_P (base) && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)), alignment) >= 0) || (TREE_CODE (base_addr) == SSA_NAME && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE ( TREE_TYPE (base_addr)))), alignment) >= 0)) base_aligned = true; else base_aligned = false; if (!base_aligned) { if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))) { if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "can't force alignment of ref: "); print_generic_expr (vect_dump, ref, TDF_SLIM); } return true; } /* Force the alignment of the decl. NOTE: This is the only change to the code we make during the analysis phase, before deciding to vectorize the loop. */ if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "force alignment"); DECL_ALIGN (base) = TYPE_ALIGN (vectype); DECL_USER_ALIGN (base) = 1; } /* At this point we assume that the base is aligned. */ gcc_assert (base_aligned || (TREE_CODE (base) == VAR_DECL && DECL_ALIGN (base) >= TYPE_ALIGN (vectype))); /* Modulo alignment. */ misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment); if (tree_int_cst_sgn (misalign) < 0) { /* Negative misalignment value. */ if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "unexpected misalign value"); return false; } DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1); if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr)); print_generic_expr (vect_dump, ref, TDF_SLIM); } return true; } /* Function vect_compute_data_refs_alignment Compute the misalignment of data references in the loop. This pass may take place at function granularity instead of at loop granularity. FOR NOW: No analysis is actually performed. Misalignment is calculated only for trivial cases. TODO. */ static bool vect_compute_data_refs_alignment (loop_vec_info loop_vinfo) { varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); unsigned int i; for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++) { struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i); if (!vect_compute_data_ref_alignment (dr)) return false; } return true; } /* Function vect_enhance_data_refs_alignment This pass will use loop versioning and loop peeling in order to enhance the alignment of data references in the loop. FOR NOW: we assume that whatever versioning/peeling takes place, only the original loop is to be vectorized; Any other loops that are created by the transformations performed in this pass - are not supposed to be vectorized. This restriction will be relaxed. */ static void vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) { varray_type loop_datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); varray_type datarefs; VEC(dr_p,heap) *same_align_drs; struct data_reference *dr0 = NULL; struct data_reference *dr; unsigned int i, j; bool check_loads; /* This pass will require a cost model to guide it whether to apply peeling or versioning or a combination of the two. For example, the scheme that intel uses when given a loop with several memory accesses, is as follows: choose one memory access ('p') which alignment you want to force by doing peeling. Then, either (1) generate a loop in which 'p' is aligned and all other accesses are not necessarily aligned, or (2) use loop versioning to generate one loop in which all accesses are aligned, and another loop in which only 'p' is necessarily aligned. ("Automatic Intra-Register Vectorization for the Intel Architecture", Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International Journal of Parallel Programming, Vol. 30, No. 2, April 2002.) Devising a cost model is the most critical aspect of this work. It will guide us on which access to peel for, whether to use loop versioning, how many versions to create, etc. The cost model will probably consist of generic considerations as well as target specific considerations (on powerpc for example, misaligned stores are more painful than misaligned loads). Here is the general steps involved in alignment enhancements: -- original loop, before alignment analysis: for (i=0; isingle_exit->dest); *live_p = true; } } } return (*live_p || *relevant_p); } /* Function vect_mark_stmts_to_be_vectorized. Not all stmts in the loop need to be vectorized. For example: for i... for j... 1. T0 = i + j 2. T1 = a[T0] 3. j = j + 1 Stmt 1 and 3 do not need to be vectorized, because loop control and addressing of vectorized data-refs are handled differently. This pass detects such stmts. */ static bool vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) { VEC(tree,heap) *worklist; struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); unsigned int nbbs = loop->num_nodes; block_stmt_iterator si; tree stmt, use; stmt_ann_t ann; ssa_op_iter iter; unsigned int i; stmt_vec_info stmt_vinfo; basic_block bb; tree phi; bool relevant_p, live_p; tree def, def_stmt; enum vect_def_type dt; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ==="); worklist = VEC_alloc (tree, heap, 64); /* 1. Init worklist. */ bb = loop->header; for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) { if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "init: phi relevant? "); print_generic_expr (vect_dump, phi, TDF_SLIM); } if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p)) vect_mark_relevant (&worklist, phi, relevant_p, live_p); } for (i = 0; i < nbbs; i++) { bb = bbs[i]; for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) { stmt = bsi_stmt (si); if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "init: stmt relevant? "); print_generic_expr (vect_dump, stmt, TDF_SLIM); } if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p)) vect_mark_relevant (&worklist, stmt, relevant_p, live_p); } } /* 2. Process_worklist */ while (VEC_length (tree, worklist) > 0) { stmt = VEC_pop (tree, worklist); if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "worklist: examine stmt: "); print_generic_expr (vect_dump, stmt, TDF_SLIM); } /* Examine the USEs of STMT. For each ssa-name USE thta is defined in the loop, mark the stmt that defines it (DEF_STMT) as relevant/irrelevant and live/dead according to the liveness and relevance properties of STMT. */ gcc_assert (TREE_CODE (stmt) != PHI_NODE); ann = stmt_ann (stmt); stmt_vinfo = vinfo_for_stmt (stmt); relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo); live_p = STMT_VINFO_LIVE_P (stmt_vinfo); /* Generally, the liveness and relevance properties of STMT are propagated to the DEF_STMTs of its USEs: STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p Exceptions: (case 1) If USE is used only for address computations (e.g. array indexing), which does not need to be directly vectorized, then the liveness/relevance of the respective DEF_STMT is left unchanged. (case 2) If STMT has been identified as defining a reduction variable, then we have two cases: (case 2.1) The last use of STMT is the reduction-variable, which is defined by a loop-header-phi. We don't want to mark the phi as live or relevant (because it does not need to be vectorized, it is handled as part of the vectorization of the reduction), so in this case we skip the call to vect_mark_relevant. (case 2.2) The rest of the uses of STMT are defined in the loop body. For the def_stmt of these uses we want to set liveness/relevance as follows: STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true because even though STMT is classified as live (since it defines a value that is used across loop iterations) and irrelevant (since it is not used inside the loop), it will be vectorized, and therefore the corresponding DEF_STMTs need to marked as relevant. */ /* case 2.2: */ if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) { gcc_assert (!relevant_p && live_p); relevant_p = true; live_p = false; } FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) { /* case 1: we are only interested in uses that need to be vectorized. Uses that are used for address computation are not considered relevant. */ if (!exist_non_indexing_operands_for_use_p (use, stmt)) continue; if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt)) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) fprintf (vect_dump, "not vectorized: unsupported use in stmt."); VEC_free (tree, heap, worklist); return false; } if (!def_stmt || IS_EMPTY_STMT (def_stmt)) continue; if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "worklist: examine use %d: ", i); print_generic_expr (vect_dump, use, TDF_SLIM); } bb = bb_for_stmt (def_stmt); if (!flow_bb_inside_loop_p (loop, bb)) continue; /* case 2.1: the reduction-use does not mark the defining-phi as relevant. */ if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def && TREE_CODE (def_stmt) == PHI_NODE) continue; vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p); } } /* while worklist */ VEC_free (tree, heap, worklist); return true; } /* Function vect_can_advance_ivs_p In case the number of iterations that LOOP iterates in unknown at compile time, an epilog loop will be generated, and the loop induction variables (IVs) will be "advanced" to the value they are supposed to take just before the epilog loop. Here we check that the access function of the loop IVs and the expression that represents the loop bound are simple enough. These restrictions will be relaxed in the future. */ static bool vect_can_advance_ivs_p (loop_vec_info loop_vinfo) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block bb = loop->header; tree phi; /* Analyze phi functions of the loop header. */ if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_can_advance_ivs_p ==="); for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) { tree access_fn = NULL; tree evolution_part; if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "Analyze phi: "); print_generic_expr (vect_dump, phi, TDF_SLIM); } /* Skip virtual phi's. The data dependences that are associated with virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "virtual phi. skip."); continue; } /* Skip reduction phis. */ if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "reduc phi. skip."); continue; } /* Analyze the evolution function. */ access_fn = instantiate_parameters (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); if (!access_fn) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "No Access function."); return false; } if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "Access function of PHI: "); print_generic_expr (vect_dump, access_fn, TDF_SLIM); } evolution_part = evolution_part_in_loop_num (access_fn, loop->num); if (evolution_part == NULL_TREE) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "No evolution."); return false; } /* FORNOW: We do not transform initial conditions of IVs which evolution functions are a polynomial of degree >= 2. */ if (tree_is_chrec (evolution_part)) return false; } return true; } /* Function vect_get_loop_niters. Determine how many iterations the loop is executed. If an expression that represents the number of iterations can be constructed, place it in NUMBER_OF_ITERATIONS. Return the loop exit condition. */ static tree vect_get_loop_niters (struct loop *loop, tree *number_of_iterations) { tree niters; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== get_loop_niters ==="); niters = number_of_iterations_in_loop (loop); if (niters != NULL_TREE && niters != chrec_dont_know) { *number_of_iterations = niters; if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "==> get_loop_niters:" ); print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM); } } return get_loop_exit_condition (loop); } /* Function vect_analyze_loop_form. Verify the following restrictions (some may be relaxed in the future): - it's an inner-most loop - number of BBs = 2 (which are the loop header and the latch) - the loop has a pre-header - the loop has a single entry and exit - the loop exit condition is simple enough, and the number of iterations can be analyzed (a countable loop). */ static loop_vec_info vect_analyze_loop_form (struct loop *loop) { loop_vec_info loop_vinfo; tree loop_cond; tree number_of_iterations = NULL; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_analyze_loop_form ==="); if (loop->inner) { if (vect_print_dump_info (REPORT_OUTER_LOOPS)) fprintf (vect_dump, "not vectorized: nested loop."); return NULL; } if (!loop->single_exit || loop->num_nodes != 2 || EDGE_COUNT (loop->header->preds) != 2) { if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) { if (!loop->single_exit) fprintf (vect_dump, "not vectorized: multiple exits."); else if (loop->num_nodes != 2) fprintf (vect_dump, "not vectorized: too many BBs in loop."); else if (EDGE_COUNT (loop->header->preds) != 2) fprintf (vect_dump, "not vectorized: too many incoming edges."); } return NULL; } /* We assume that the loop exit condition is at the end of the loop. i.e, that the loop is represented as a do-while (with a proper if-guard before the loop if needed), where the loop header contains all the executable statements, and the latch is empty. */ if (!empty_block_p (loop->latch)) { if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) fprintf (vect_dump, "not vectorized: unexpected loop form."); return NULL; } /* Make sure there exists a single-predecessor exit bb: */ if (!single_pred_p (loop->single_exit->dest)) { edge e = loop->single_exit; if (!(e->flags & EDGE_ABNORMAL)) { split_loop_exit_edge (e); if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "split exit edge."); } else { if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) fprintf (vect_dump, "not vectorized: abnormal loop exit edge."); return NULL; } } if (empty_block_p (loop->header)) { if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) fprintf (vect_dump, "not vectorized: empty loop."); return NULL; } loop_cond = vect_get_loop_niters (loop, &number_of_iterations); if (!loop_cond) { if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) fprintf (vect_dump, "not vectorized: complicated exit condition."); return NULL; } if (!number_of_iterations) { if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) fprintf (vect_dump, "not vectorized: number of iterations cannot be computed."); return NULL; } if (chrec_contains_undetermined (number_of_iterations)) { if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) fprintf (vect_dump, "Infinite number of iterations."); return false; } loop_vinfo = new_loop_vec_info (loop); LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) { if (vect_print_dump_info (REPORT_DETAILS)) { fprintf (vect_dump, "Symbolic number of iterations is "); print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS); } } else if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) fprintf (vect_dump, "not vectorized: number of iterations = 0."); return NULL; } LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond; return loop_vinfo; } /* Function vect_analyze_loop. Apply a set of analyses on LOOP, and create a loop_vec_info struct for it. The different analyses will record information in the loop_vec_info struct. */ loop_vec_info vect_analyze_loop (struct loop *loop) { bool ok; loop_vec_info loop_vinfo; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "===== analyze_loop_nest ====="); /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */ loop_vinfo = vect_analyze_loop_form (loop); if (!loop_vinfo) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "bad loop form."); return NULL; } /* Find all data references in the loop (which correspond to vdefs/vuses) and analyze their evolution in the loop. FORNOW: Handle only simple, array references, which alignment can be forced, and aligned pointer-references. */ ok = vect_analyze_data_refs (loop_vinfo); if (!ok) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "bad data references."); destroy_loop_vec_info (loop_vinfo); return NULL; } /* Classify all cross-iteration scalar data-flow cycles. Cross-iteration cycles caused by virtual phis are analyzed separately. */ vect_analyze_scalar_cycles (loop_vinfo); /* Data-flow analysis to detect stmts that do not need to be vectorized. */ ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); if (!ok) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "unexpected pattern."); destroy_loop_vec_info (loop_vinfo); return NULL; } ok = vect_determine_vectorization_factor (loop_vinfo); if (!ok) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "can't determine vectorization factor."); destroy_loop_vec_info (loop_vinfo); return NULL; } /* Analyze data dependences between the data-refs in the loop. FORNOW: fail at the first data dependence that we encounter. */ ok = vect_analyze_data_ref_dependences (loop_vinfo); if (!ok) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "bad data dependence."); destroy_loop_vec_info (loop_vinfo); return NULL; } /* Analyze the access patterns of the data-refs in the loop (consecutive, complex, etc.). FORNOW: Only handle consecutive access pattern. */ ok = vect_analyze_data_ref_accesses (loop_vinfo); if (!ok) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "bad data access."); destroy_loop_vec_info (loop_vinfo); return NULL; } /* Analyze the alignment of the data-refs in the loop. FORNOW: Only aligned accesses are handled. */ ok = vect_analyze_data_refs_alignment (loop_vinfo); if (!ok) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "bad data alignment."); destroy_loop_vec_info (loop_vinfo); return NULL; } /* Scan all the operations in the loop and make sure they are vectorizable. */ ok = vect_analyze_operations (loop_vinfo); if (!ok) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "bad operation or unsupported loop bound."); destroy_loop_vec_info (loop_vinfo); return NULL; } LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; return loop_vinfo; }