diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/tree-cfg.c | 125 | ||||
-rw-r--r-- | gcc/tree-parloops.c | 23 |
3 files changed, 54 insertions, 109 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7df2ce2..147a8a2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +28-01-2010 Razya Ladelsky <razya@il.ibm.com> + + * tree-parloops.c (transform_to_exit_first_loop): Update the basic + block list passed to gimple_duplicate_sese_tail. + (parallelize_loops): Avoid parallelization when the function + has_nonlocal_label. + Avoid parallelization when the preheader is IRREDUCIBLE. + Try to optimize when estimated_loop_iterations_int is unresolved. + Add the loop's location to the dump file. + * tree-cfg.c(add_phi_args_after_redirect): Remove. + (gimple_duplicate_sese_tail): Remove the check for the latch. + Redirect nexits to the exit block. + Remove handling of the incoming edges to the latch. + Redirect the backedge from the copied latch to the exit bb. + 2010-01-28 Michael Matz <matz@suse.de> PR target/42881 diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index d927698..361b46e 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -4927,31 +4927,6 @@ gimple_duplicate_bb (basic_block bb) return new_bb; } -/* Add phi arguments to the phi nodes in E_COPY->dest according to - the phi arguments coming from the equivalent edge at - the phi nodes of DEST. */ - -static void -add_phi_args_after_redirect (edge e_copy, edge orig_e) -{ - gimple_stmt_iterator psi, psi_copy; - gimple phi, phi_copy; - tree def; - - for (psi = gsi_start_phis (orig_e->dest), - psi_copy = gsi_start_phis (e_copy->dest); - !gsi_end_p (psi); - gsi_next (&psi), gsi_next (&psi_copy)) - { - - phi = gsi_stmt (psi); - phi_copy = gsi_stmt (psi_copy); - def = PHI_ARG_DEF_FROM_EDGE (phi, orig_e); - add_phi_arg (phi_copy, def, e_copy, - gimple_phi_arg_location_from_edge (phi, orig_e)); - } -} - /* Adds phi node arguments for edge E_COPY after basic block duplication. */ static void @@ -5235,12 +5210,13 @@ gimple_duplicate_sese_tail (edge entry ATTRIBUTE_UNUSED, edge exit ATTRIBUTE_UNU edge exits[2], nexits[2], e; gimple_stmt_iterator gsi,gsi1; gimple cond_stmt; - edge sorig, snew, orig_e; + edge sorig, snew; basic_block exit_bb; - edge_iterator ei; - VEC (edge, heap) *redirect_edges; - basic_block iters_bb, orig_src; + basic_block iters_bb; tree new_rhs; + gimple_stmt_iterator psi; + gimple phi; + tree def; gcc_assert (EDGE_COUNT (exit->src->succs) == 2); exits[0] = exit; @@ -5249,17 +5225,6 @@ gimple_duplicate_sese_tail (edge entry ATTRIBUTE_UNUSED, edge exit ATTRIBUTE_UNU if (!can_copy_bbs_p (region, n_region)) return false; - /* Some sanity checking. Note that we do not check for all possible - missuses of the functions. I.e. if you ask to copy something weird - (e.g., in the example, if there is a jump from inside to the middle - of some_code, or come_code defines some of the values used in cond) - it will work, but the resulting code will not be correct. */ - for (i = 0; i < n_region; i++) - { - if (region[i] == orig_loop->latch) - return false; - } - initialize_original_copy_tables (); set_loop_copy (orig_loop, loop); duplicate_subloops (orig_loop, loop); @@ -5377,72 +5342,30 @@ gimple_duplicate_sese_tail (edge entry ATTRIBUTE_UNUSED, edge exit ATTRIBUTE_UNU e = redirect_edge_and_branch (exits[0], exits[1]->dest); PENDING_STMT (e) = NULL; - /* If the block consisting of the exit condition has the latch as - successor, then the body of the loop is executed before - the exit condition is tested. - - { body } - { cond } (exit[0]) -> { latch } - | - V (exit[1]) - - { exit_bb } - - - In such case, the equivalent copied edge nexits[1] - (for the peeled iteration) needs to be redirected to exit_bb. - - Otherwise, - - { cond } (exit[0]) -> { body } - | - V (exit[1]) - - { exit_bb } - - - exit[0] is pointing to the body of the loop, - and the equivalent nexits[0] needs to be redirected to - the copied body (of the peeled iteration). */ - - if (exits[1]->dest == orig_loop->latch) - e = redirect_edge_and_branch (nexits[1], nexits[0]->dest); - else - e = redirect_edge_and_branch (nexits[0], nexits[1]->dest); - PENDING_STMT (e) = NULL; - - redirect_edges = VEC_alloc (edge, heap, 10); - - for (i = 0; i < n_region; i++) - region_copy[i]->flags |= BB_DUPLICATED; - - /* Iterate all incoming edges to latch. All those coming from - copied bbs will be redirected to exit_bb. */ - FOR_EACH_EDGE (e, ei, orig_loop->latch->preds) - { - if (e->src->flags & BB_DUPLICATED) - VEC_safe_push (edge, heap, redirect_edges, e); - } - + /* The latch of ORIG_LOOP was copied, and so was the backedge + to the original header. We redirect this backedge to EXIT_BB. */ for (i = 0; i < n_region; i++) - region_copy[i]->flags &= ~BB_DUPLICATED; - - for (i = 0; VEC_iterate (edge, redirect_edges, i, e); ++i) - { - e = redirect_edge_and_branch (e, exit_bb); - PENDING_STMT (e) = NULL; - orig_src = get_bb_original (e->src); - orig_e = find_edge (orig_src, orig_loop->latch); - add_phi_args_after_redirect (e, orig_e); - } - - VEC_free (edge, heap, redirect_edges); - + if (get_bb_original (region_copy[i]) == orig_loop->latch) + { + gcc_assert (single_succ_edge (region_copy[i])); + e = redirect_edge_and_branch (single_succ_edge (region_copy[i]), exit_bb); + PENDING_STMT (e) = NULL; + for (psi = gsi_start_phis (exit_bb); + !gsi_end_p (psi); + gsi_next (&psi)) + { + phi = gsi_stmt (psi); + def = PHI_ARG_DEF (phi, nexits[0]->dest_idx); + add_phi_arg (phi, def, e, gimple_phi_arg_location_from_edge (phi, e)); + } + } + e = redirect_edge_and_branch (nexits[0], nexits[1]->dest); + PENDING_STMT (e) = NULL; + /* Anything that is outside of the region, but was dominated by something inside needs to update dominance info. */ iterate_fix_dominators (CDI_DOMINATORS, doms, false); VEC_free (basic_block, heap, doms); - /* Update the SSA web. */ update_ssa (TODO_update_ssa); diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index cf01970..885a713 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -1329,7 +1329,6 @@ transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit for (n = 0; bbs[n] != loop->latch; n++) continue; - n--; nbbs = XNEWVEC (basic_block, n); ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit, bbs + 1, n, nbbs); @@ -1884,10 +1883,14 @@ parallelize_loops (void) struct tree_niter_desc niter_desc; loop_iterator li; htab_t reduction_list; - + HOST_WIDE_INT estimated; + LOC loop_loc; + /* Do not parallelize loops in the functions created by parallelization. */ if (parallelized_function_p (cfun->decl)) return false; + if (cfun->has_nonlocal_label) + return false; reduction_list = htab_create (10, reduction_info_hash, reduction_info_eq, free); @@ -1926,15 +1929,16 @@ parallelize_loops (void) if (/* And of course, the loop must be parallelizable. */ !can_duplicate_loop_p (loop) || loop_has_blocks_with_irreducible_flag (loop) + || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP) /* FIXME: the check for vector phi nodes could be removed. */ || loop_has_vector_phi_nodes (loop)) continue; - + estimated = estimated_loop_iterations_int (loop, false); /* FIXME: Bypass this check as graphite doesn't update the count and frequency correctly now. */ if (!flag_loop_parallelize_all - && ((estimated_loop_iterations_int (loop, false) - <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD) + && ((estimated !=-1 + && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD) /* Do not bother with loops in cold areas. */ || optimize_loop_nest_for_size_p (loop))) continue; @@ -1951,11 +1955,14 @@ parallelize_loops (void) changed = true; if (dump_file && (dump_flags & TDF_DETAILS)) { - fprintf (dump_file, "parallelizing "); if (loop->inner) - fprintf (dump_file, "outer loop\n"); + fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index); else - fprintf (dump_file, "inner loop\n"); + fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index); + loop_loc = find_loop_location (loop); + if (loop_loc != UNKNOWN_LOC) + fprintf (dump_file, "\nloop at %s:%d: ", + LOC_FILE (loop_loc), LOC_LINE (loop_loc)); } gen_parallel_loop (loop, reduction_list, n_threads, &niter_desc); |