diff options
author | Dorit Naishlos <dorit@il.ibm.com> | 2004-11-19 19:39:40 +0000 |
---|---|---|
committer | Dorit Nuzman <dorit@gcc.gnu.org> | 2004-11-19 19:39:40 +0000 |
commit | 63dfe6ff6fdc78b68ce28e222f6fd9bb616199c9 (patch) | |
tree | 13edb3a9ae038b16cd0283198b9667757f2ba2fb /gcc | |
parent | 335d3d5495500663c42f1aec13694c48782a9996 (diff) | |
download | gcc-63dfe6ff6fdc78b68ce28e222f6fd9bb616199c9.zip gcc-63dfe6ff6fdc78b68ce28e222f6fd9bb616199c9.tar.gz gcc-63dfe6ff6fdc78b68ce28e222f6fd9bb616199c9.tar.bz2 |
re PR tree-optimization/18181 (vectorizer: problem in the peeling mechanism in the presence of loop invariants that are used after the loop)
PR tree-opt/18181
* tree-vectorizer.c (slpeel_tree_peel_loop_to_edge): Peeling scheme
changed to suppoer uses-after-loop and to void creating flow paths
that shouldn't exist.
(slpeel_update_phi_nodes_for_guard): Takes additional two arguments.
Modified to fit the new peeling scheme. Avoid quadratic behavior.
(slpeel_add_loop_guard): Takes additional argument.
(slpeel_verify_cfg_after_peeling): New function.
(vect_update_ivs_after_vectorizer): Takes additional argument. Updated
documentation. Use 'exit-bb' instead of creating 'new-bb'.
(rename_variables_in_bb): Don't update phis for BBs out of loop, to fit
the new peeling scheme.
(copy_phi_nodes): Function removed. Its functionality moved to
update_phis_for_duplicate_loop.
(slpeel_update_phis_for_duplicate_loop): Functionality of copy_phi_nodes
moved here. Added documentation. Modified to fit the new peeling scheme.
(slpeel_make_loop_iterate_ntimes): Setting loop->single_exit not not
needed - done in slpeel_tree_peel_loop_to_edge.
(slpeel_tree_duplicate_loop_to_edge_cfg): Debug printouts compacted.
(vect_do_peeling_for_loop_bound): Add documentation. Call
slpeel_verify_cfg_after_peeling. Call vect_update_ivs_after_vectorizer
with additional argument.
(vect_do_peeling_for_alignment): Call slpeel_verify_cfg_after_peeling.
(vect_finish_stmt_generation): Avoid 80 column oveflow.
From-SVN: r90932
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 28 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-85.c | 48 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-86.c | 48 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-87.c | 52 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-88.c | 52 | ||||
-rw-r--r-- | gcc/tree-vectorizer.c | 728 |
7 files changed, 651 insertions, 313 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index cf8c73b..9026a1d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,33 @@ 2004-11-19 Dorit Naishlos <dorit@il.ibm.com> + PR tree-opt/18181 + * tree-vectorizer.c (slpeel_tree_peel_loop_to_edge): Peeling scheme + changed to suppoer uses-after-loop and to void creating flow paths + that shouldn't exist. + (slpeel_update_phi_nodes_for_guard): Takes additional two arguments. + Modified to fit the new peeling scheme. Avoid quadratic behavior. + (slpeel_add_loop_guard): Takes additional argument. + (slpeel_verify_cfg_after_peeling): New function. + (vect_update_ivs_after_vectorizer): Takes additional argument. Updated + documentation. Use 'exit-bb' instead of creating 'new-bb'. + (rename_variables_in_bb): Don't update phis for BBs out of loop, to fit + the new peeling scheme. + (copy_phi_nodes): Function removed. Its functionality moved to + update_phis_for_duplicate_loop. + (slpeel_update_phis_for_duplicate_loop): Functionality of copy_phi_nodes + moved here. Added documentation. Modified to fit the new peeling scheme. + (slpeel_make_loop_iterate_ntimes): Setting loop->single_exit not not + needed - done in slpeel_tree_peel_loop_to_edge. + (slpeel_tree_duplicate_loop_to_edge_cfg): Debug printouts compacted. + (vect_do_peeling_for_loop_bound): Add documentation. Call + slpeel_verify_cfg_after_peeling. Call vect_update_ivs_after_vectorizer + with additional argument. + (vect_do_peeling_for_alignment): Call slpeel_verify_cfg_after_peeling. + + (vect_finish_stmt_generation): Avoid 80 column oveflow. + +2004-11-19 Dorit Naishlos <dorit@il.ibm.com> + * tree-vectorizer.c (slpeel_make_loop_iterate_ntimes): Last two arguments removed. (slpeel_tree_peel_loop_to_edge): Call slpeel_make_loop_iterate_ntimes diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ebd0d42..b08e056 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2004-11-19 Dorit Naishlos <dorit@il.ibm.com> + + PR tree-opt/18181 + * gcc.dg/vect/vect-85.c: New test. + * gcc.dg/vect/vect-86.c: New test. + * gcc.dg/vect/vect-87.c: New test. + * gcc.dg/vect/vect-88.c: New test. + 2004-11-19 Ben Elliston <bje@au.ibm.com> * gcc.dg/pr16286.c: Test __pixel and __bool keywords. diff --git a/gcc/testsuite/gcc.dg/vect/vect-85.c b/gcc/testsuite/gcc.dg/vect/vect-85.c new file mode 100644 index 0000000..26063e8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-85.c @@ -0,0 +1,48 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 16 + +typedef int aint __attribute__ ((__aligned__(16))); + +int main1 (int *a) +{ + int i, j, k; + int b[N]; + + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + k = i + N; + a[j] = k; + } + b[i] = k; + } + + + for (j = 0; j < N; j++) + if (a[j] != i + N - 1) + abort(); + + for (j = 0; j < N; j++) + if (b[j] != j + N) + abort(); + + return 0; +} + +int main (void) +{ + aint a[N]; + + check_vect (); + + main1 (a); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-86.c b/gcc/testsuite/gcc.dg/vect/vect-86.c new file mode 100644 index 0000000..9caa887 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-86.c @@ -0,0 +1,48 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 16 + +int main1 (int n) +{ + int i, j, k; + int a[N], b[N]; + + for (i = 0; i < n; i++) + { + for (j = 0; j < n; j++) + { + k = i + n; + a[j] = k; + } + b[i] = k; + } + + + for (j = 0; j < n; j++) + if (a[j] != i + n - 1) + abort(); + + for (i = 0; i < n; i++) + if (b[i] != i + n) + abort(); + + return 0; +} + +int main (void) +{ + check_vect (); + + main1 (N); + main1 (0); + main1 (1); + main1 (2); + main1 (N-1); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-87.c b/gcc/testsuite/gcc.dg/vect/vect-87.c new file mode 100644 index 0000000..546178f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-87.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 16 + +typedef int aint __attribute__ ((__aligned__(16))); + +int main1 (int n, int *a) +{ + int i, j, k; + int b[N]; + + for (i = 0; i < n; i++) + { + for (j = 0; j < n; j++) + { + k = i + n; + a[j] = k; + } + b[i] = k; + } + + + for (j = 0; j < n; j++) + if (a[j] != i + n - 1) + abort(); + + for (j = 0; j < n; j++) + if (b[j] != j + n) + abort(); + + return 0; +} + +int main (void) +{ + aint a[N]; + + check_vect (); + + main1 (N, a); + main1 (0, a); + main1 (1, a); + main1 (2, a); + main1 (N-1, a); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-88.c b/gcc/testsuite/gcc.dg/vect/vect-88.c new file mode 100644 index 0000000..59984ea --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-88.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 16 + +typedef int aint __attribute__ ((__aligned__(16))); + +int main1 (int n, int *a) +{ + int i, j, k; + int b[N]; + + for (i = 0; i < n; i++) + { + for (j = 0; j < n; j++) + { + k = i + n; + a[j] = k; + } + b[i] = k; + } + + + for (j = 0; j < n; j++) + if (a[j] != i + n - 1) + abort(); + + for (j = 0; j < n; j++) + if (b[j] != j + n) + abort(); + + return 0; +} + +int main (void) +{ + aint a[N+1]; + + check_vect (); + + main1 (N, a+1); + main1 (0, a+1); + main1 (1, a+1); + main1 (2, a+1); + main1 (N-1, a+1); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index a7cc59b..4430e18 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -164,10 +164,11 @@ static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, struct loops *, edge); static void slpeel_update_phis_for_duplicate_loop (struct loop *, struct loop *, bool after); -static void slpeel_update_phi_nodes_for_guard (edge, struct loop *); +static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool); static void slpeel_make_loop_iterate_ntimes (struct loop *, tree); -static edge slpeel_add_loop_guard (basic_block, tree, basic_block); +static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block); static bool slpeel_can_duplicate_loop_p (struct loop *, edge); +static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *); static void allocate_new_names (bitmap); static void rename_use_op (use_operand_p); static void rename_def_op (def_operand_p, tree); @@ -249,7 +250,7 @@ static void vect_finish_stmt_generation static void vect_generate_tmps_on_preheader (loop_vec_info, tree *, tree *, tree *); static tree vect_build_loop_niters (loop_vec_info); -static void vect_update_ivs_after_vectorizer (struct loop *, tree); +static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge); static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree); static void vect_update_inits_of_dr (struct data_reference *, struct loop *, tree niters); @@ -360,6 +361,7 @@ rename_variables_in_bb (basic_block bb) unsigned i; edge e; edge_iterator ei; + struct loop *loop = bb->loop_father; for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) rename_def_op (PHI_RESULT_PTR (phi), phi); @@ -398,8 +400,12 @@ rename_variables_in_bb (basic_block bb) } FOR_EACH_EDGE (e, ei, bb->succs) - for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi)) - rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e)); + { + if (!flow_bb_inside_loop_p (loop, e->dest)) + continue; + for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi)) + rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e)); + } } @@ -441,165 +447,204 @@ rename_variables_in_loop (struct loop *loop) } -/* This function copies phis from LOOP header to - NEW_LOOP header. AFTER is as - in update_phis_for_duplicate_loop function. */ - -static void -copy_phi_nodes (struct loop *loop, struct loop *new_loop, - bool after) -{ - tree phi, new_phi, def; - edge new_e; - edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop)); - - /* Second add arguments to newly created phi nodes. */ - for (phi = phi_nodes (loop->header), - new_phi = phi_nodes (new_loop->header); - phi; - phi = PHI_CHAIN (phi), - new_phi = PHI_CHAIN (new_phi)) - { - new_e = loop_preheader_edge (new_loop); - def = PHI_ARG_DEF_FROM_EDGE (phi, e); - add_phi_arg (&new_phi, def, new_e); - } -} - +/* Update the PHI nodes of NEW_LOOP. -/* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP - executes after LOOP, and false if it executes before it. */ + NEW_LOOP is a duplicate of ORIG_LOOP. + AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP: + AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it + executes before it. */ static void -slpeel_update_phis_for_duplicate_loop (struct loop *loop, +slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop, struct loop *new_loop, bool after) { - edge old_latch; tree *new_name_ptr, new_ssa_name; - tree phi_new, phi_old, def; - edge orig_entry_e = loop_preheader_edge (loop); + tree phi_new, phi_orig; + tree def; + edge orig_loop_latch = loop_latch_edge (orig_loop); + edge orig_entry_e = loop_preheader_edge (orig_loop); + edge new_loop_exit_e = new_loop->exit_edges[0]; + edge new_loop_entry_e = loop_preheader_edge (new_loop); + edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e); - /* Copy phis from loop->header to new_loop->header. */ - copy_phi_nodes (loop, new_loop, after); + /* + step 1. For each loop-header-phi: + Add the first phi argument for the phi in NEW_LOOP + (the one associated with the entry of NEW_LOOP) + + step 2. For each loop-header-phi: + Add the second phi argument for the phi in NEW_LOOP + (the one associated with the latch of NEW_LOOP) + + step 3. Update the phis in the successor block of NEW_LOOP. + + case 1: NEW_LOOP was placed before ORIG_LOOP: + The successor block of NEW_LOOP is the header of ORIG_LOOP. + Updating the phis in the successor block can therefore be done + along with the scanning of the loop header phis, because the + header blocks of ORIG_LOOP and NEW_LOOP have exactly the same + phi nodes, organized in the same order. + + case 2: NEW_LOOP was placed after ORIG_LOOP: + The successor block of NEW_LOOP is the original exit block of + ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis. + We postpone updating these phis to a later stage (when + loop guards are added). + */ - old_latch = loop_latch_edge (loop); - /* Update PHI args for the new loop latch edge, and - the old loop preheader edge, we know that the PHI nodes - are ordered appropriately in copy_phi_nodes. */ + /* Scan the phis in the headers of the old and new loops + (they are organized in exactly the same order). */ + for (phi_new = phi_nodes (new_loop->header), - phi_old = phi_nodes (loop->header); - phi_new && phi_old; - phi_new = PHI_CHAIN (phi_new), phi_old = PHI_CHAIN (phi_old)) + phi_orig = phi_nodes (orig_loop->header); + phi_new && phi_orig; + phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig)) { - def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch); + /* step 1. */ + def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e); + add_phi_arg (&phi_new, def, new_loop_entry_e); + /* step 2. */ + def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch); if (TREE_CODE (def) != SSA_NAME) - continue; + continue; new_name_ptr = SSA_NAME_AUX (def); - - /* Something defined outside of the loop. */ if (!new_name_ptr) - continue; + /* Something defined outside of the loop. */ + continue; /* An ordinary ssa name defined in the loop. */ new_ssa_name = *new_name_ptr; + add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge (new_loop)); - add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop)); - - /* Update PHI args for the original loop pre-header edge. */ - if (! after) - SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e), - new_ssa_name); + /* step 3 (case 1). */ + if (!after) + { + gcc_assert (new_loop_exit_e == orig_entry_e); + SET_PHI_ARG_DEF (phi_orig, + phi_arg_from_edge (phi_orig, new_loop_exit_e), + new_ssa_name); + } } } /* Update PHI nodes for a guard of the LOOP. - LOOP is supposed to have a preheader bb at which a guard condition is - located. The true edge of this condition skips the LOOP and ends - at the destination of the (unique) LOOP exit. The loop exit bb is supposed - to be an empty bb (created by this transformation) with one successor. - - This function creates phi nodes at the LOOP exit bb. These phis need to be - created as a result of adding true edge coming from guard. - - FORNOW: Only phis which have corresponding phi nodes at the header of the - LOOP are created. Here we use the assumption that after the LOOP there - are no uses of defs generated in LOOP. - - After the phis creation, the function updates the values of phi nodes at - the LOOP exit successor bb: - - Original loop: - - bb0: loop preheader - goto bb1 - bb1: loop header - if (exit_cond) goto bb3 else goto bb2 - bb2: loop latch - goto bb1 - bb3: - - - After guard creation (the loop before this function): - - bb0: loop preheader - if (guard_condition) goto bb4 else goto bb1 - bb1: loop header - if (exit_cond) goto bb4 else goto bb2 - bb2: loop latch - goto bb1 - bb4: loop exit - (new empty bb) - goto bb3 - bb3: - - This function updates the phi nodes in bb4 and in bb3, to account for the - new edge from bb0 to bb4. */ + Input: + - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that + controls whether LOOP is to be executed. GUARD_EDGE is the edge that + originates from the guard-bb, skips LOOP and reaches the (unique) exit + bb of LOOP. This loop-exit-bb is an empty bb with one successor. + We denote this bb NEW_MERGE_BB because it had a single predecessor (the + LOOP header) before the guard code was added, and now it became a merge + point of two paths - the path that ends with the LOOP exit-edge, and + the path that ends with GUARD_EDGE. + + This function creates and updates the relevant phi nodes to account for + the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB: + 1. Create phi nodes at NEW_MERGE_BB. + 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted + UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB + was added: + + ===> The CFG before the guard-code was added: + LOOP_header_bb: + if (exit_loop) goto update_bb : LOOP_header_bb + update_bb: + + ==> The CFG after the guard-code was added: + guard_bb: + if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb + LOOP_header_bb: + if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb + new_merge_bb: + goto update_bb + update_bb: + + - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in + UPDATE_BB are loop entry phis, like the phis in the LOOP header, + organized in the same order. + If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are + loop exit phis. + + - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another + "original" loop). FALSE if LOOP is an original loop (not a newly + created copy). The SSA_NAME_AUX fields of the defs in the origianl + loop are the corresponding new ssa-names used in the new duplicated + loop copy. IS_NEW_LOOP indicates which of the two args of the phi + nodes in UPDATE_BB takes the original ssa-name, and which takes the + new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with + the LOOP-exit-edge takes the new-name, and the phi-arg that is + associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is + FALSE, it's the other way around. + */ static void -slpeel_update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop) +slpeel_update_phi_nodes_for_guard (edge guard_edge, + struct loop *loop, + bool entry_phis, + bool is_new_loop) { - tree phi, phi1; - basic_block bb = loop->exit_edges[0]->dest; - - for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) - { - tree new_phi; - tree phi_arg; - - /* Generate new phi node. */ - new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), bb); + tree orig_phi, new_phi, update_phi; + tree guard_arg, loop_arg; + basic_block new_merge_bb = guard_edge->dest; + edge e = EDGE_SUCC (new_merge_bb, 0); + basic_block update_bb = e->dest; + basic_block orig_bb = (entry_phis ? loop->header : update_bb); + + for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb); + orig_phi && update_phi; + orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi)) + { + /* 1. Generate new phi node in NEW_MERGE_BB: */ + new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), + new_merge_bb); + + /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge + of LOOP. Set the two phi args in NEW_PHI for these edges: */ + if (entry_phis) + { + loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, + EDGE_SUCC (loop->latch, 0)); + guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]); + } + else /* exit phis */ + { + tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); + tree *new_name_ptr = SSA_NAME_AUX (orig_def); + tree new_name; + + if (new_name_ptr) + new_name = *new_name_ptr; + else + /* Something defined outside of the loop */ + new_name = orig_def; + + if (is_new_loop) + { + guard_arg = orig_def; + loop_arg = new_name; + } + else + { + guard_arg = new_name; + loop_arg = orig_def; + } + } + add_phi_arg (&new_phi, loop_arg, loop->exit_edges[0]); + add_phi_arg (&new_phi, guard_arg, guard_edge); - /* Add argument coming from guard true edge. */ - phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]); - add_phi_arg (&new_phi, phi_arg, guard_true_edge); + /* 3. Update phi in successor block. */ + gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg + || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg); + SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e), + PHI_RESULT (new_phi)); + } - /* Add argument coming from loop exit edge. */ - phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)); - add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]); - - /* Update all phi nodes at the loop exit successor. */ - for (phi1 = phi_nodes (EDGE_SUCC (bb, 0)->dest); - phi1; - phi1 = PHI_CHAIN (phi1)) - { - tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1, EDGE_SUCC (bb, 0)); - if (old_arg == phi_arg) - { - edge e = EDGE_SUCC (bb, 0); - - SET_PHI_ARG_DEF (phi1, - phi_arg_from_edge (phi1, e), - PHI_RESULT (new_phi)); - } - } - } - - set_phi_nodes (bb, phi_reverse (phi_nodes (bb))); + set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb))); } @@ -618,8 +663,6 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) tree begin_label = tree_block_label (loop->latch); tree exit_label = tree_block_label (loop->single_exit->dest); - /* Flow loop scan does not update loop->single_exit field. */ - loop->single_exit = loop->exit_edges[0]; orig_cond = get_loop_exit_condition (loop); gcc_assert (orig_cond); create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop, @@ -630,7 +673,6 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) bsi_next (&loop_exit_bsi); gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond); - if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */ cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters); else /* 'then' edge loops back. */ @@ -670,8 +712,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, if (!at_exit && e != loop_preheader_edge (loop)) { if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, - "Edge is not an entry nor an exit edge.\n"); + fprintf (dump_file, "Edge is not an entry nor an exit edge.\n"); return NULL; } @@ -681,8 +722,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, if (!can_copy_bbs_p (bbs, loop->num_nodes)) { if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, - "Cannot copy basic blocks.\n"); + fprintf (dump_file, "Cannot copy basic blocks.\n"); free (bbs); return NULL; } @@ -692,8 +732,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, if (!new_loop) { if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, - "The duplicate_loop returns NULL.\n"); + fprintf (dump_file, "duplicate_loop returns NULL.\n"); free (bbs); return NULL; } @@ -776,7 +815,8 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, Returns the skip edge. */ static edge -slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb) +slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb, + basic_block dom_bb) { block_stmt_iterator bsi; edge new_e, enter_e; @@ -796,7 +836,7 @@ slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb) bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT); /* Add new edge to connect entry block to the second loop. */ new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE); - set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb); + set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb); return new_e; } @@ -837,61 +877,77 @@ slpeel_can_duplicate_loop_p (struct loop *loop, edge e) } -/* Given LOOP this function duplicates it to the edge E. - - This transformation takes place before the loop is vectorized. - For now, there are two main cases when it's used - by the vectorizer: to support loops with unknown loop bounds - (or loop bounds indivisible by vectorization factor) and to force the - alignment of data references in the loop. In the first case, LOOP is - duplicated to the exit edge, producing epilog loop. In the second case, LOOP - is duplicated to the preheader edge thus generating prolog loop. In both - cases, the original loop will be vectorized after the transformation. - - The edge E is supposed to be either preheader edge of the LOOP or - its exit edge. If preheader edge is specified, the LOOP copy - will precede the original one. Otherwise the copy will be located - at the exit of the LOOP. +static void +slpeel_verify_cfg_after_peeling (struct loop *first_loop, + struct loop *second_loop) +{ + basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest; + basic_block loop2_entry_bb = second_loop->pre_header; + basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src; + + /* A guard that controls whether the second_loop is to be executed or skipped + is placed in first_loop->exit. first_loopt->exit therefore has two + successors - one is the preheader of second_loop, and the other is a bb + after second_loop. + */ + gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2); - FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate - the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first - loop will be iterated FIRST_NITERS times by introducing additional - induction variable and replacing loop exit condition. If - UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and - the caller to tree_duplicate_loop_to_edge is responsible for updating - the first loop count. - NITERS (also SSA_NAME) parameter defines the number of iteration the - original loop iterated. The function generates two if-then guards: - one prior to the first loop and the other prior to the second loop. - The first guard will be: - - if (FIRST_NITERS == 0) then skip the first loop + /* 1. Verify that one of the successors of first_loopt->exit is the preheader + of second_loop. */ - The second guard will be: + /* The preheader of new_loop is expected to have two predessors: + first_loop->exit and the block that precedes first_loop. */ + + gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2 + && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb + && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb) + || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb + && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb))); + + /* Verify that the other successor of first_loopt->exit is after the + second_loop. */ + /* TODO */ +} - if (FIRST_NITERS == NITERS) then skip the second loop - Thus the equivalence to the original code is guaranteed by correct values - of NITERS and FIRST_NITERS and generation of if-then loop guards. +/* Function slpeel_tree_peel_loop_to_edge. - For now this function supports only loop forms that are candidate for - vectorization. Such types are the following: + Peel the first (last) iterations of LOOP into a new prolog (epilog) loop + that is placed on the entry (exit) edge E of LOOP. After this transformation + we have two loops one after the other - first-loop iterates FIRST_NITERS + times, and second-loop iterates the remainder NITERS - FIRST_NITERS times. - (1) only innermost loops - (2) loops built from 2 basic blocks - (3) loops with one entry and one exit - (4) loops without function calls - (5) loops without defs that are used after the loop + Input: + - LOOP: the loop to be peeled. + - E: the exit or entry edge of LOOP. + If it is the entry edge, we peel the first iterations of LOOP. In this + case first-loop is LOOP, and second-loop is the newly created loop. + If it is the exit edge, we peel the last iterations of LOOP. In this + case, first-loop is the newly created loop, and second-loop is LOOP. + - NITERS: the number of iterations that LOOP iterates. + - FIRST_NITERS: the number of iterations that the first-loop should iterate. + - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responssible + for updating the loop bound of the first-loop to FIRST_NITERS. If it + is false, the caller of this function may want to take care of this + (this can be usefull is we don't want new stmts added to first-loop). - (1), (3) are checked in this function; (2) - in function - vect_analyze_loop_form; (4) - in function vect_analyze_data_refs; - (5) is checked as part of the function vect_mark_stmts_to_be_vectorized, - when excluding induction/reduction support. + Output: + The function returns a pointer to the new loop-copy, or NULL if it failed + to perform the trabsformation. + + The function generates two if-then-else guards: one before the first loop, + and the other before the second loop: + The first guard is: + if (FIRST_NITERS == 0) then skip the first loop, + and go directly to the second loop. + The second guard is: + if (FIRST_NITERS == NITERS) then skip the second loop. + + FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p). + FORNOW the resulting code will not be in loop-closed-ssa form. +*/ - The function returns NULL in case one of these checks or - transformations failed. */ - struct loop* slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, edge e, tree first_niters, @@ -901,117 +957,151 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, edge skip_e; tree pre_condition; bitmap definitions; - basic_block first_exit_bb, second_exit_bb; - basic_block pre_header_bb; + basic_block bb_before_second_loop, bb_after_second_loop; + basic_block bb_before_first_loop; + basic_block bb_between_loops; edge exit_e = loop->exit_edges [0]; - + if (!slpeel_can_duplicate_loop_p (loop, e)) return NULL; - - /* We have to initialize cfg_hooks. Then, when calling + + /* We have to initialize cfg_hooks. Then, when calling cfg_hooks->split_edge, the function tree_split_edge - is actually called and, when calling cfg_hooks->duplicate_block, + is actually called and, when calling cfg_hooks->duplicate_block, the function tree_duplicate_bb is called. */ tree_register_cfg_hooks (); - /* 1. Generate a copy of LOOP and put it on E (entry or exit). */ + + /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP). + Resulting CFG would be: + + first_loop: + do { + } while ... + + second_loop: + do { + } while ... + + orig_exit_bb: + */ + if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e))) { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, - "The tree_duplicate_loop_to_edge_cfg failed.\n"); + if (vect_debug_stats (loop) || vect_debug_details (loop)) + fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n"); return NULL; } - - definitions = marked_ssa_names (); - allocate_new_names (definitions); - slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e); - /* Here, using assumption (5), we do not propagate new names further - than on phis of the exit from the second loop. */ - rename_variables_in_loop (new_loop); - free_new_names (definitions); - + if (e == exit_e) { + /* NEW_LOOP was placed after LOOP. */ first_loop = loop; second_loop = new_loop; } - else + else { + /* NEW_LOOP was placed before LOOP. */ first_loop = new_loop; second_loop = loop; } - /* 2. Generate bb between the loops. */ - first_exit_bb = split_edge (first_loop->exit_edges[0]); - add_bb_to_loop (first_exit_bb, first_loop->outer); + definitions = marked_ssa_names (); + allocate_new_names (definitions); + slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e); + rename_variables_in_loop (new_loop); + - /* We need to update here first loop exit edge - and second loop preheader edge. */ - flow_loop_scan (first_loop, LOOP_ALL); - flow_loop_scan (second_loop, LOOP_ALL); - /* Flow loop scan does not update loop->single_exit field. */ - first_loop->single_exit = first_loop->exit_edges[0]; - second_loop->single_exit = second_loop->exit_edges[0]; + /* 2. Add the guard that controls whether the first loop is executed. + Resulting CFG would be: - /* 3. Make first loop iterate FIRST_NITERS times, if needed. */ - if (!update_first_loop_count) - slpeel_make_loop_iterate_ntimes (first_loop, first_niters); - - /* 4. Add the guard before first loop: + bb_before_first_loop: + if (FIRST_NITERS == 0) GOTO bb_before_second_loop + GOTO first-loop + + first_loop: + do { + } while ... - if FIRST_NITERS == 0 - skip first loop - else - enter first loop */ + bb_before_second_loop: - /* 4a. Generate bb before first loop. */ - pre_header_bb = split_edge (loop_preheader_edge (first_loop)); - add_bb_to_loop (pre_header_bb, first_loop->outer); + second_loop: + do { + } while ... - /* First loop preheader edge is changed. */ + orig_exit_bb: + */ + + bb_before_first_loop = split_edge (loop_preheader_edge (first_loop)); + add_bb_to_loop (bb_before_first_loop, first_loop->outer); + bb_before_second_loop = split_edge (first_loop->exit_edges[0]); + add_bb_to_loop (bb_before_second_loop, first_loop->outer); flow_loop_scan (first_loop, LOOP_ALL); + flow_loop_scan (second_loop, LOOP_ALL); + + pre_condition = + build (LE_EXPR, boolean_type_node, first_niters, integer_zero_node); + skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition, + bb_before_second_loop, bb_before_first_loop); + slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */, + first_loop == new_loop); + - /* 4b. Generate guard condition. */ - pre_condition = build (LE_EXPR, boolean_type_node, - first_niters, integer_zero_node); + /* 3. Add the guard that controls whether the second loop is executed. + Resulting CFG would be: - /* 4c. Add condition at the end of preheader bb. */ - skip_e = slpeel_add_loop_guard (pre_header_bb, pre_condition, first_exit_bb); + bb_before_first_loop: + if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop) + GOTO first-loop - /* 4d. Update phis at first loop exit and propagate changes - to the phis of second loop. */ - slpeel_update_phi_nodes_for_guard (skip_e, first_loop); + first_loop: + do { + } while ... - /* 5. Add the guard before second loop: + bb_between_loops: + if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop) + GOTO bb_before_second_loop - if FIRST_NITERS == NITERS SKIP - skip second loop - else - enter second loop */ + bb_before_second_loop: - /* 5a. Generate empty bb at the exit from the second loop. */ - second_exit_bb = split_edge (second_loop->exit_edges[0]); - add_bb_to_loop (second_exit_bb, second_loop->outer); + second_loop: + do { + } while ... - /* Second loop preheader edge is changed. */ + bb_after_second_loop: + + orig_exit_bb: + */ + + bb_between_loops = split_edge (first_loop->exit_edges[0]); + add_bb_to_loop (bb_between_loops, first_loop->outer); + bb_after_second_loop = split_edge (second_loop->exit_edges[0]); + add_bb_to_loop (bb_after_second_loop, second_loop->outer); + flow_loop_scan (first_loop, LOOP_ALL); flow_loop_scan (second_loop, LOOP_ALL); - /* 5b. Generate guard condition. */ - pre_condition = build (EQ_EXPR, boolean_type_node, - first_niters, niters); + pre_condition = build (EQ_EXPR, boolean_type_node, first_niters, niters); + skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, + bb_after_second_loop, bb_before_first_loop); + slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */, + second_loop == new_loop); - /* 5c. Add condition at the end of preheader bb. */ - skip_e = slpeel_add_loop_guard (first_exit_bb, pre_condition, second_exit_bb); - slpeel_update_phi_nodes_for_guard (skip_e, second_loop); + /* Flow loop scan does not update loop->single_exit field. */ + first_loop->single_exit = first_loop->exit_edges[0]; + second_loop->single_exit = second_loop->exit_edges[0]; + /* 4. Make first-loop iterate FIRST_NITERS times, if requested. + */ + if (update_first_loop_count) + slpeel_make_loop_iterate_ntimes (first_loop, first_niters); + + free_new_names (definitions); BITMAP_XFREE (definitions); unmark_all_for_rewrite (); - + return new_loop; } - /* Here the proper Vectorizer starts. */ @@ -2094,7 +2184,8 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi) /* Make sure bsi points to the stmt that is being vectorized. */ /* Assumption: any stmts created for the vectorization of stmt S were - inserted before S. BSI is expected to point to S or some new stmt before S. */ + inserted before S. BSI is expected to point to S or some new stmt before S. + */ while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi)) bsi_next (bsi); @@ -2837,24 +2928,13 @@ vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters) of LOOP were peeled. - NITERS - the number of iterations that LOOP executes (before it is vectorized). i.e, the number of times the ivs should be bumped. + - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path + coming out from LOOP on which there are uses of the LOOP ivs + (this is the path from LOOP->exit to epilog_loop->preheader). - We have: - - bb_before_loop: - if (guard-cond) GOTO bb_before_epilog_loop - else GOTO loop - - loop: - do { - } while ... - - bb_before_epilog_loop: - - bb_before_epilog_loop has edges coming in form the loop exit and - from bb_before_loop. New definitions for ivs will be placed on the edge - from loop->exit to bb_before_epilog_loop. This also requires that we update - the phis in bb_before_epilog_loop. (In the code this bb is denoted - "update_bb"). + The new definitions of the ivs are placed in LOOP->exit. + The phi args associated with the edge UPDATE_E in the bb + UPDATE_E->dest are updated accordingly. Assumption 1: Like the rest of the vectorizer, this function assumes a single loop exit that has a single predecessor. @@ -2864,23 +2944,26 @@ vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters) Assumption 3: The access function of the ivs is simple enough (see vect_can_advance_ivs_p). This assumption will be relaxed in the future. + + Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path + coming out of LOOP on which the ivs of LOOP are used (this is the path + that leads to the epilog loop; other paths skip the epilog loop). This + path starts with the edge UPDATE_E, and its destination (denoted update_bb) + needs to have its phis updated. */ static void -vect_update_ivs_after_vectorizer (struct loop *loop, tree niters) +vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e) { - edge exit = loop->exit_edges[0]; + basic_block exit_bb = loop->exit_edges[0]->dest; tree phi, phi1; - basic_block update_bb = exit->dest; - edge update_e; + basic_block update_bb = update_e->dest; - /* Generate basic block at the exit from the loop. */ - basic_block new_bb = split_edge (exit); + /* gcc_assert (vect_can_advance_ivs_p (loop)); */ + + /* Make sure there exists a single-predecessor exit bb: */ + gcc_assert (EDGE_COUNT (exit_bb->preds) == 1); - add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father); - loop->exit_edges[0] = EDGE_PRED (new_bb, 0); - update_e = EDGE_SUCC (new_bb, 0); - for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb); phi && phi1; phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1)) @@ -2892,9 +2975,7 @@ vect_update_ivs_after_vectorizer (struct loop *loop, tree niters) tree var, stmt, ni, ni_name; block_stmt_iterator last_bsi; - /* Skip virtual phi's. The data dependences that are associated with - virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ - + /* Skip virtual phi's. */ if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) { if (vect_debug_details (NULL)) @@ -2906,10 +2987,10 @@ vect_update_ivs_after_vectorizer (struct loop *loop, tree niters) gcc_assert (access_fn); evolution_part = unshare_expr (evolution_part_in_loop_num (access_fn, loop->num)); + gcc_assert (evolution_part != NULL_TREE); - /* FORNOW: We do not transform initial conditions of IVs - which evolution functions are a polynomial of degree >= 2 or - exponential. */ + /* FORNOW: We do not support IVs whose evolution function is a polynomial + of degree >= 2 or exponential. */ gcc_assert (!tree_is_chrec (evolution_part)); step_expr = evolution_part; @@ -2924,12 +3005,12 @@ vect_update_ivs_after_vectorizer (struct loop *loop, tree niters) ni_name = force_gimple_operand (ni, &stmt, false, var); - /* Insert stmt into new_bb. */ - last_bsi = bsi_last (new_bb); + /* Insert stmt into exit_bb. */ + last_bsi = bsi_last (exit_bb); if (stmt) - bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT); + bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT); - /* Fix phi expressions in duplicated loop. */ + /* Fix phi expressions in the successor bb. */ gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) == PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0))); SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name); @@ -2937,21 +3018,28 @@ vect_update_ivs_after_vectorizer (struct loop *loop, tree niters) } -/* This function is the main driver of transformation - to be done for loop before vectorizing it in case of - unknown loop bound. */ +/* Function vect_do_peeling_for_loop_bound + + Peel the last iterations of the loop represented by LOOP_VINFO. + The peeled iterations form a new epilog loop. Given that the loop now + iterates NITERS times, the new epilog loop iterates + NITERS % VECTORIZATION_FACTOR times. + + The original loop will later be made to iterate + NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */ static void -vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree * ratio, +vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, struct loops *loops) { tree ni_name, ratio_mult_vf_name; + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + struct loop *new_loop; + edge update_e; #ifdef ENABLE_CHECKING int loop_num; #endif - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - struct loop *new_loop; if (vect_debug_details (NULL)) fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n"); @@ -2972,23 +3060,32 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree * ratio, loop_num = loop->num; #endif new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0], - ratio_mult_vf_name, ni_name, true); + ratio_mult_vf_name, ni_name, false); #ifdef ENABLE_CHECKING gcc_assert (new_loop); gcc_assert (loop_num == loop->num); + slpeel_verify_cfg_after_peeling (loop, new_loop); #endif + /* A guard that controls whether the new_loop is to be executed or skipped + is placed in LOOP->exit. LOOP->exit therefore has two successors - one + is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other + is a bb after NEW_LOOP, where these IVs are not used. Find the edge that + is on the path where the LOOP IVs are used and need to be updated. */ + + if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest) + update_e = EDGE_PRED (new_loop->pre_header, 0); + else + update_e = EDGE_PRED (new_loop->pre_header, 1); + /* Update IVs of original loop as if they were advanced by ratio_mult_vf_name steps. */ + vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e); -#ifdef ENABLE_CHECKING - /* Check existence of intermediate bb. */ - gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header); -#endif - vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name); + /* After peeling we have to reset scalar evolution analyzer. */ + scev_reset (); return; - } @@ -3133,6 +3230,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree niters_of_prolog_loop, ni_name; tree n_iters; + struct loop *new_loop; if (vect_debug_details (NULL)) fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n"); @@ -3140,17 +3238,21 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) ni_name = vect_build_loop_niters (loop_vinfo); niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name); - /* Peel the prolog loop and iterate it niters_of_prolog_loop. */ - slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge(loop), - niters_of_prolog_loop, ni_name, false); + new_loop = + slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop), + niters_of_prolog_loop, ni_name, true); +#ifdef ENABLE_CHECKING + gcc_assert (new_loop); + slpeel_verify_cfg_after_peeling (new_loop, loop); +#endif /* Update number of times loop executes. */ n_iters = LOOP_VINFO_NITERS (loop_vinfo); LOOP_VINFO_NITERS (loop_vinfo) = build (MINUS_EXPR, integer_type_node, n_iters, niters_of_prolog_loop); - /* Update all inits of access functions of all data refs. */ + /* Update the init conditions of the access functions of all data refs. */ vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop); /* After peeling we have to reset scalar evolution analyzer. */ |