diff options
author | Jan Hubicka <jh@suse.cz> | 2012-04-11 19:47:01 +0200 |
---|---|---|
committer | Jan Hubicka <hubicka@gcc.gnu.org> | 2012-04-11 17:47:01 +0000 |
commit | a66dc2852c9ca8359c1649c86c0182b66be8ee92 (patch) | |
tree | b09f68a90379e756d6e4c89adad4e7a2412d93bb /gcc/lto | |
parent | c634f4ba6f4178ad317f03a61179ba7596b9c737 (diff) | |
download | gcc-a66dc2852c9ca8359c1649c86c0182b66be8ee92.zip gcc-a66dc2852c9ca8359c1649c86c0182b66be8ee92.tar.gz gcc-a66dc2852c9ca8359c1649c86c0182b66be8ee92.tar.bz2 |
lto.c: Update copyright...
* lto.c: Update copyright; remove params.h, ipa-inline.h
and ipa-utils.h inlines; inline lto-partition.h
(ltrans_partition_def, ltrans_partition, add_cgraph_node_to_partition,
add_varpool_node_to_partition, new_partition, free_ltrans_partitions,
add_references_to_partition, add_cgraph_node_to_partition_1,
add_cgraph_node_to_partition, add_varpool_node_to_partition,
undo_partition, partition_cgraph_node_p, partition_varpool_node_p,
lto_1_to_1_map, node_cmp, varpool_node_cmp, lto_balanced_map,
promote_var, promote_fn, lto_promote_cross_file_statics): move to...
* lto-partition.c: ... here; new file.
* lto-partition.h: New file.
* Make-lang.in (lto.o): Update dependencies.
(lto-partition.o): New.
From-SVN: r186343
Diffstat (limited to 'gcc/lto')
-rw-r--r-- | gcc/lto/ChangeLog | 16 | ||||
-rw-r--r-- | gcc/lto/Make-lang.in | 11 | ||||
-rw-r--r-- | gcc/lto/lto-partition.c | 911 | ||||
-rw-r--r-- | gcc/lto/lto-partition.h | 40 | ||||
-rw-r--r-- | gcc/lto/lto.c | 899 |
5 files changed, 977 insertions, 900 deletions
diff --git a/gcc/lto/ChangeLog b/gcc/lto/ChangeLog index ef3bbfb6..fe4a9c4 100644 --- a/gcc/lto/ChangeLog +++ b/gcc/lto/ChangeLog @@ -1,3 +1,19 @@ +2012-04-11 Jan Hubicka <jh@suse.cz> + + * lto.c: Update copyright; remove params.h, ipa-inline.h + and ipa-utils.h inlines; inline lto-partition.h + (ltrans_partition_def, ltrans_partition, add_cgraph_node_to_partition, + add_varpool_node_to_partition, new_partition, free_ltrans_partitions, + add_references_to_partition, add_cgraph_node_to_partition_1, + add_cgraph_node_to_partition, add_varpool_node_to_partition, + undo_partition, partition_cgraph_node_p, partition_varpool_node_p, + lto_1_to_1_map, node_cmp, varpool_node_cmp, lto_balanced_map, + promote_var, promote_fn, lto_promote_cross_file_statics): move to... + * lto-partition.c: ... here; new file. + * lto-partition.h: New file. + * Make-lang.in (lto.o): Update dependencies. + (lto-partition.o): New. + 2012-04-05 Richard Guenther <rguenther@suse.de> * lto-lang.c (LANG_HOOKS_CALLGRAPH_EXPAND_FUNCTION): Remove diff --git a/gcc/lto/Make-lang.in b/gcc/lto/Make-lang.in index c48c8d2..b023109 100644 --- a/gcc/lto/Make-lang.in +++ b/gcc/lto/Make-lang.in @@ -23,7 +23,7 @@ # The name of the LTO compiler. LTO_EXE = lto1$(exeext) # The LTO-specific object files inclued in $(LTO_EXE). -LTO_OBJS = lto/lto-lang.o lto/lto.o lto/lto-object.o attribs.o +LTO_OBJS = lto/lto-lang.o lto/lto.o lto/lto-object.o attribs.o lto/lto-partition.o LTO_H = lto/lto.h $(HASHTAB_H) LINKER_PLUGIN_API_H = $(srcdir)/../include/plugin-api.h LTO_TREE_H = lto/lto-tree.h $(LINKER_PLUGIN_API_H) @@ -85,8 +85,13 @@ lto/lto.o: lto/lto.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(OPTS_H) \ $(CGRAPH_H) $(GGC_H) tree-ssa-operands.h $(TREE_PASS_H) \ langhooks.h $(VEC_H) $(BITMAP_H) pointer-set.h $(IPA_PROP_H) \ $(COMMON_H) debug.h $(TIMEVAR_H) $(GIMPLE_H) $(LTO_H) $(LTO_TREE_H) \ - $(LTO_TAGS_H) $(LTO_STREAMER_H) $(SPLAY_TREE_H) gt-lto-lto.h $(PARAMS_H) \ - ipa-inline.h $(IPA_UTILS_H) $(TREE_STREAMER_H) + $(LTO_TAGS_H) $(LTO_STREAMER_H) $(SPLAY_TREE_H) gt-lto-lto.h \ + $(TREE_STREAMER_H) lto/lto-partition.h +lto/lto-partition.o: lto/lto-partition.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ + toplev.h $(TREE_H) $(TM_H) \ + $(CGRAPH_H) $(TIMEVAR_H) \ + $(LTO_STREAMER_H) $(SPLAY_TREE_H) gt-lto-lto.h $(PARAMS_H) \ + ipa-inline.h $(IPA_UTILS_H) lto/lto-partition.h lto/lto-object.o: lto/lto-object.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(DIAGNOSTIC_CORE_H) $(LTO_H) $(TM_H) $(LTO_STREAMER_H) \ ../include/simple-object.h diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c new file mode 100644 index 0000000..126c469 --- /dev/null +++ b/gcc/lto/lto-partition.c @@ -0,0 +1,911 @@ +/* LTO partitioning logic routines. + Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "toplev.h" +#include "tree.h" +#include "tm.h" +#include "cgraph.h" +#include "lto-streamer.h" +#include "timevar.h" +#include "params.h" +#include "ipa-inline.h" +#include "ipa-utils.h" +#include "lto-partition.h" + +VEC(ltrans_partition, heap) *ltrans_partitions; + +static void add_cgraph_node_to_partition (ltrans_partition part, struct cgraph_node *node); +static void add_varpool_node_to_partition (ltrans_partition part, struct varpool_node *vnode); + +/* Create new partition with name NAME. */ +static ltrans_partition +new_partition (const char *name) +{ + ltrans_partition part = XCNEW (struct ltrans_partition_def); + part->cgraph_set = cgraph_node_set_new (); + part->varpool_set = varpool_node_set_new (); + part->name = name; + part->insns = 0; + VEC_safe_push (ltrans_partition, heap, ltrans_partitions, part); + return part; +} + +/* Free memory used by ltrans datastructures. */ +void +free_ltrans_partitions (void) +{ + unsigned int idx; + ltrans_partition part; + for (idx = 0; VEC_iterate (ltrans_partition, ltrans_partitions, idx, part); idx++) + { + free_cgraph_node_set (part->cgraph_set); + free (part); + } + VEC_free (ltrans_partition, heap, ltrans_partitions); +} + +/* See all references that go to comdat objects and bring them into partition too. + Also see all aliases of the newly added entry and bring them, too. */ +static void +add_references_to_partition (ltrans_partition part, struct ipa_ref_list *refs) +{ + int i; + struct ipa_ref *ref; + for (i = 0; ipa_ref_list_reference_iterate (refs, i, ref); i++) + { + if (ref->refered_type == IPA_REF_CGRAPH + && (DECL_COMDAT (cgraph_function_node (ipa_ref_node (ref), + NULL)->decl) + || (ref->use == IPA_REF_ALIAS + && lookup_attribute + ("weakref", DECL_ATTRIBUTES (ipa_ref_node (ref)->decl)))) + && !cgraph_node_in_set_p (ipa_ref_node (ref), part->cgraph_set)) + add_cgraph_node_to_partition (part, ipa_ref_node (ref)); + else + if (ref->refered_type == IPA_REF_VARPOOL + && (DECL_COMDAT (ipa_ref_varpool_node (ref)->decl) + || (ref->use == IPA_REF_ALIAS + && lookup_attribute + ("weakref", + DECL_ATTRIBUTES (ipa_ref_varpool_node (ref)->decl)))) + && !varpool_node_in_set_p (ipa_ref_varpool_node (ref), + part->varpool_set)) + add_varpool_node_to_partition (part, ipa_ref_varpool_node (ref)); + } + for (i = 0; ipa_ref_list_refering_iterate (refs, i, ref); i++) + { + if (ref->refering_type == IPA_REF_CGRAPH + && ref->use == IPA_REF_ALIAS + && !cgraph_node_in_set_p (ipa_ref_refering_node (ref), + part->cgraph_set) + && !lookup_attribute ("weakref", + DECL_ATTRIBUTES + (ipa_ref_refering_node (ref)->decl))) + add_cgraph_node_to_partition (part, ipa_ref_refering_node (ref)); + else + if (ref->refering_type == IPA_REF_VARPOOL + && ref->use == IPA_REF_ALIAS + && !varpool_node_in_set_p (ipa_ref_refering_varpool_node (ref), + part->varpool_set) + && !lookup_attribute ("weakref", + DECL_ATTRIBUTES + (ipa_ref_refering_varpool_node (ref)->decl))) + add_varpool_node_to_partition (part, + ipa_ref_refering_varpool_node (ref)); + } +} + +/* Worker for add_cgraph_node_to_partition. */ + +static bool +add_cgraph_node_to_partition_1 (struct cgraph_node *node, void *data) +{ + ltrans_partition part = (ltrans_partition) data; + + /* non-COMDAT aliases of COMDAT functions needs to be output just once. */ + if (!DECL_COMDAT (node->decl) + && !node->global.inlined_to + && node->aux) + { + gcc_assert (node->thunk.thunk_p || node->alias); + return false; + } + + if (node->aux) + { + node->in_other_partition = 1; + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "Node %s/%i now used in multiple partitions\n", + cgraph_node_name (node), node->uid); + } + node->aux = (void *)((size_t)node->aux + 1); + cgraph_node_set_add (part->cgraph_set, node); + return false; +} + +/* Add NODE to partition as well as the inline callees and referred comdats into partition PART. */ + +static void +add_cgraph_node_to_partition (ltrans_partition part, struct cgraph_node *node) +{ + struct cgraph_edge *e; + cgraph_node_set_iterator csi; + struct cgraph_node *n; + + /* If NODE is already there, we have nothing to do. */ + csi = cgraph_node_set_find (part->cgraph_set, node); + if (!csi_end_p (csi)) + return; + + cgraph_for_node_thunks_and_aliases (node, add_cgraph_node_to_partition_1, part, true); + + part->insns += inline_summary (node)->self_size; + + + cgraph_node_set_add (part->cgraph_set, node); + + for (e = node->callees; e; e = e->next_callee) + if ((!e->inline_failed + || DECL_COMDAT (cgraph_function_node (e->callee, NULL)->decl)) + && !cgraph_node_in_set_p (e->callee, part->cgraph_set)) + add_cgraph_node_to_partition (part, e->callee); + + /* The only way to assemble non-weakref alias is to add the aliased object into + the unit. */ + add_references_to_partition (part, &node->ref_list); + n = cgraph_function_node (node, NULL); + if (n != node + && !lookup_attribute ("weakref", + DECL_ATTRIBUTES (node->decl))) + add_cgraph_node_to_partition (part, n); + + if (node->same_comdat_group) + for (n = node->same_comdat_group; n != node; n = n->same_comdat_group) + add_cgraph_node_to_partition (part, n); +} + +/* Add VNODE to partition as well as comdat references partition PART. */ + +static void +add_varpool_node_to_partition (ltrans_partition part, struct varpool_node *vnode) +{ + varpool_node_set_iterator vsi; + struct varpool_node *v; + + /* If NODE is already there, we have nothing to do. */ + vsi = varpool_node_set_find (part->varpool_set, vnode); + if (!vsi_end_p (vsi)) + return; + + varpool_node_set_add (part->varpool_set, vnode); + + if (vnode->aux) + { + vnode->in_other_partition = 1; + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "Varpool node %s now used in multiple partitions\n", + varpool_node_name (vnode)); + } + vnode->aux = (void *)((size_t)vnode->aux + 1); + + /* The only way to assemble non-weakref alias is to add the aliased object into + the unit. */ + v = varpool_variable_node (vnode, NULL); + if (v != vnode + && !lookup_attribute ("weakref", + DECL_ATTRIBUTES (vnode->decl))) + add_varpool_node_to_partition (part, v); + + add_references_to_partition (part, &vnode->ref_list); + + if (vnode->same_comdat_group + && !varpool_node_in_set_p (vnode->same_comdat_group, part->varpool_set)) + add_varpool_node_to_partition (part, vnode->same_comdat_group); +} + +/* Undo all additions until number of cgraph nodes in PARITION is N_CGRAPH_NODES + and number of varpool nodes is N_VARPOOL_NODES. */ + +static void +undo_partition (ltrans_partition partition, unsigned int n_cgraph_nodes, + unsigned int n_varpool_nodes) +{ + while (VEC_length (cgraph_node_ptr, partition->cgraph_set->nodes) > + n_cgraph_nodes) + { + struct cgraph_node *node = VEC_index (cgraph_node_ptr, + partition->cgraph_set->nodes, + n_cgraph_nodes); + partition->insns -= inline_summary (node)->self_size; + cgraph_node_set_remove (partition->cgraph_set, node); + node->aux = (void *)((size_t)node->aux - 1); + } + while (VEC_length (varpool_node_ptr, partition->varpool_set->nodes) > + n_varpool_nodes) + { + struct varpool_node *node = VEC_index (varpool_node_ptr, + partition->varpool_set->nodes, + n_varpool_nodes); + varpool_node_set_remove (partition->varpool_set, node); + node->aux = (void *)((size_t)node->aux - 1); + } +} + +/* Return true if NODE should be partitioned. + This means that partitioning algorithm should put NODE into one of partitions. + This apply to most functions with bodies. Functions that are not partitions + are put into every unit needing them. This is the case of i.e. COMDATs. */ + +static bool +partition_cgraph_node_p (struct cgraph_node *node) +{ + /* We will get proper partition based on function they are inlined to. */ + if (node->global.inlined_to) + return false; + /* Nodes without a body do not need partitioning. */ + if (!node->analyzed) + return false; + /* Extern inlines and comdat are always only in partitions they are needed. */ + if (DECL_EXTERNAL (node->decl) + || (DECL_COMDAT (node->decl) + && !cgraph_used_from_object_file_p (node))) + return false; + if (lookup_attribute ("weakref", DECL_ATTRIBUTES (node->decl))) + return false; + return true; +} + +/* Return true if VNODE should be partitioned. + This means that partitioning algorithm should put VNODE into one of partitions. */ + +static bool +partition_varpool_node_p (struct varpool_node *vnode) +{ + if (vnode->alias || !vnode->needed) + return false; + /* Constant pool and comdat are always only in partitions they are needed. */ + if (DECL_IN_CONSTANT_POOL (vnode->decl) + || (DECL_COMDAT (vnode->decl) + && !vnode->force_output + && !varpool_used_from_object_file_p (vnode))) + return false; + if (lookup_attribute ("weakref", DECL_ATTRIBUTES (vnode->decl))) + return false; + return true; +} + +/* Group cgrah nodes by input files. This is used mainly for testing + right now. */ + +void +lto_1_to_1_map (void) +{ + struct cgraph_node *node; + struct varpool_node *vnode; + struct lto_file_decl_data *file_data; + struct pointer_map_t *pmap; + ltrans_partition partition; + void **slot; + int npartitions = 0; + + timevar_push (TV_WHOPR_WPA); + + pmap = pointer_map_create (); + + for (node = cgraph_nodes; node; node = node->next) + { + if (!partition_cgraph_node_p (node) + || node->aux) + continue; + + file_data = node->local.lto_file_data; + + if (file_data) + { + slot = pointer_map_contains (pmap, file_data); + if (slot) + partition = (ltrans_partition) *slot; + else + { + partition = new_partition (file_data->file_name); + slot = pointer_map_insert (pmap, file_data); + *slot = partition; + npartitions++; + } + } + else if (!file_data + && VEC_length (ltrans_partition, ltrans_partitions)) + partition = VEC_index (ltrans_partition, ltrans_partitions, 0); + else + { + partition = new_partition (""); + slot = pointer_map_insert (pmap, NULL); + *slot = partition; + npartitions++; + } + + add_cgraph_node_to_partition (partition, node); + } + + for (vnode = varpool_nodes; vnode; vnode = vnode->next) + { + if (!partition_varpool_node_p (vnode) + || vnode->aux) + continue; + file_data = vnode->lto_file_data; + slot = pointer_map_contains (pmap, file_data); + if (slot) + partition = (ltrans_partition) *slot; + else + { + partition = new_partition (file_data->file_name); + slot = pointer_map_insert (pmap, file_data); + *slot = partition; + npartitions++; + } + + add_varpool_node_to_partition (partition, vnode); + } + for (node = cgraph_nodes; node; node = node->next) + node->aux = NULL; + for (vnode = varpool_nodes; vnode; vnode = vnode->next) + vnode->aux = NULL; + + /* If the cgraph is empty, create one cgraph node set so that there is still + an output file for any variables that need to be exported in a DSO. */ + if (!npartitions) + new_partition ("empty"); + + pointer_map_destroy (pmap); + + timevar_pop (TV_WHOPR_WPA); + + lto_stats.num_cgraph_partitions += VEC_length (ltrans_partition, + ltrans_partitions); +} + +/* Helper function for qsort; sort nodes by order. */ +static int +node_cmp (const void *pa, const void *pb) +{ + const struct cgraph_node *a = *(const struct cgraph_node * const *) pa; + const struct cgraph_node *b = *(const struct cgraph_node * const *) pb; + return b->order - a->order; +} + +/* Helper function for qsort; sort nodes by order. */ +static int +varpool_node_cmp (const void *pa, const void *pb) +{ + const struct varpool_node *a = *(const struct varpool_node * const *) pa; + const struct varpool_node *b = *(const struct varpool_node * const *) pb; + return b->order - a->order; +} + +/* Group cgraph nodes into equally-sized partitions. + + The partitioning algorithm is simple: nodes are taken in predefined order. + The order corresponds to the order we want functions to have in the final + output. In the future this will be given by function reordering pass, but + at the moment we use the topological order, which is a good approximation. + + The goal is to partition this linear order into intervals (partitions) so + that all the partitions have approximately the same size and the number of + callgraph or IPA reference edges crossing boundaries is minimal. + + This is a lot faster (O(n) in size of callgraph) than algorithms doing + priority-based graph clustering that are generally O(n^2) and, since + WHOPR is designed to make things go well across partitions, it leads + to good results. + + We compute the expected size of a partition as: + + max (total_size / lto_partitions, min_partition_size) + + We use dynamic expected size of partition so small programs are partitioned + into enough partitions to allow use of multiple CPUs, while large programs + are not partitioned too much. Creating too many partitions significantly + increases the streaming overhead. + + In the future, we would like to bound the maximal size of partitions so as + to prevent the LTRANS stage from consuming too much memory. At the moment, + however, the WPA stage is the most memory intensive for large benchmarks, + since too many types and declarations are read into memory. + + The function implements a simple greedy algorithm. Nodes are being added + to the current partition until after 3/4 of the expected partition size is + reached. Past this threshold, we keep track of boundary size (number of + edges going to other partitions) and continue adding functions until after + the current partition has grown to twice the expected partition size. Then + the process is undone to the point where the minimal ratio of boundary size + and in-partition calls was reached. */ + +void +lto_balanced_map (void) +{ + int n_nodes = 0; + int n_varpool_nodes = 0, varpool_pos = 0; + struct cgraph_node **postorder = + XCNEWVEC (struct cgraph_node *, cgraph_n_nodes); + struct cgraph_node **order = XNEWVEC (struct cgraph_node *, cgraph_max_uid); + struct varpool_node **varpool_order = NULL; + int i, postorder_len; + struct cgraph_node *node; + int total_size = 0, best_total_size = 0; + int partition_size; + ltrans_partition partition; + unsigned int last_visited_cgraph_node = 0, last_visited_varpool_node = 0; + struct varpool_node *vnode; + int cost = 0, internal = 0; + int best_n_nodes = 0, best_n_varpool_nodes = 0, best_i = 0, best_cost = + INT_MAX, best_internal = 0; + int npartitions; + int current_order = -1; + + for (vnode = varpool_nodes; vnode; vnode = vnode->next) + gcc_assert (!vnode->aux); + /* Until we have better ordering facility, use toplogical order. + Include only nodes we will partition and compute estimate of program + size. Note that since nodes that are not partitioned might be put into + multiple partitions, this is just an estimate of real size. This is why + we keep partition_size updated after every partition is finalized. */ + postorder_len = ipa_reverse_postorder (postorder); + + for (i = 0; i < postorder_len; i++) + { + node = postorder[i]; + if (partition_cgraph_node_p (node)) + { + order[n_nodes++] = node; + total_size += inline_summary (node)->size; + } + } + free (postorder); + + if (!flag_toplevel_reorder) + { + qsort (order, n_nodes, sizeof (struct cgraph_node *), node_cmp); + + for (vnode = varpool_nodes; vnode; vnode = vnode->next) + if (partition_varpool_node_p (vnode)) + n_varpool_nodes++; + varpool_order = XNEWVEC (struct varpool_node *, n_varpool_nodes); + + n_varpool_nodes = 0; + for (vnode = varpool_nodes; vnode; vnode = vnode->next) + if (partition_varpool_node_p (vnode)) + varpool_order[n_varpool_nodes++] = vnode; + qsort (varpool_order, n_varpool_nodes, sizeof (struct varpool_node *), + varpool_node_cmp); + } + + /* Compute partition size and create the first partition. */ + partition_size = total_size / PARAM_VALUE (PARAM_LTO_PARTITIONS); + if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE)) + partition_size = PARAM_VALUE (MIN_PARTITION_SIZE); + npartitions = 1; + partition = new_partition (""); + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "Total unit size: %i, partition size: %i\n", + total_size, partition_size); + + for (i = 0; i < n_nodes; i++) + { + if (order[i]->aux) + continue; + + current_order = order[i]->order; + + if (!flag_toplevel_reorder) + while (varpool_pos < n_varpool_nodes && varpool_order[varpool_pos]->order < current_order) + { + if (!varpool_order[varpool_pos]->aux) + add_varpool_node_to_partition (partition, varpool_order[varpool_pos]); + varpool_pos++; + } + + add_cgraph_node_to_partition (partition, order[i]); + total_size -= inline_summary (order[i])->size; + + + /* Once we added a new node to the partition, we also want to add + all referenced variables unless they was already added into some + earlier partition. + add_cgraph_node_to_partition adds possibly multiple nodes and + variables that are needed to satisfy needs of ORDER[i]. + We remember last visited cgraph and varpool node from last iteration + of outer loop that allows us to process every new addition. + + At the same time we compute size of the boundary into COST. Every + callgraph or IPA reference edge leaving the partition contributes into + COST. Every edge inside partition was earlier computed as one leaving + it and thus we need to subtract it from COST. */ + while (last_visited_cgraph_node < + VEC_length (cgraph_node_ptr, partition->cgraph_set->nodes) + || last_visited_varpool_node < VEC_length (varpool_node_ptr, + partition->varpool_set-> + nodes)) + { + struct ipa_ref_list *refs; + int j; + struct ipa_ref *ref; + bool cgraph_p = false; + + if (last_visited_cgraph_node < + VEC_length (cgraph_node_ptr, partition->cgraph_set->nodes)) + { + struct cgraph_edge *edge; + + cgraph_p = true; + node = VEC_index (cgraph_node_ptr, partition->cgraph_set->nodes, + last_visited_cgraph_node); + refs = &node->ref_list; + + last_visited_cgraph_node++; + + gcc_assert (node->analyzed); + + /* Compute boundary cost of callgraph edges. */ + for (edge = node->callees; edge; edge = edge->next_callee) + if (edge->callee->analyzed) + { + int edge_cost = edge->frequency; + cgraph_node_set_iterator csi; + + if (!edge_cost) + edge_cost = 1; + gcc_assert (edge_cost > 0); + csi = cgraph_node_set_find (partition->cgraph_set, edge->callee); + if (!csi_end_p (csi) + && csi.index < last_visited_cgraph_node - 1) + cost -= edge_cost, internal+= edge_cost; + else + cost += edge_cost; + } + for (edge = node->callers; edge; edge = edge->next_caller) + { + int edge_cost = edge->frequency; + cgraph_node_set_iterator csi; + + gcc_assert (edge->caller->analyzed); + if (!edge_cost) + edge_cost = 1; + gcc_assert (edge_cost > 0); + csi = cgraph_node_set_find (partition->cgraph_set, edge->caller); + if (!csi_end_p (csi) + && csi.index < last_visited_cgraph_node) + cost -= edge_cost; + else + cost += edge_cost; + } + } + else + { + refs = + &VEC_index (varpool_node_ptr, partition->varpool_set->nodes, + last_visited_varpool_node)->ref_list; + last_visited_varpool_node++; + } + + /* Compute boundary cost of IPA REF edges and at the same time look into + variables referenced from current partition and try to add them. */ + for (j = 0; ipa_ref_list_reference_iterate (refs, j, ref); j++) + if (ref->refered_type == IPA_REF_VARPOOL) + { + varpool_node_set_iterator vsi; + + vnode = ipa_ref_varpool_node (ref); + if (!vnode->finalized) + continue; + if (!vnode->aux && flag_toplevel_reorder + && partition_varpool_node_p (vnode)) + add_varpool_node_to_partition (partition, vnode); + vsi = varpool_node_set_find (partition->varpool_set, vnode); + if (!vsi_end_p (vsi) + && vsi.index < last_visited_varpool_node - !cgraph_p) + cost--, internal++; + else + cost++; + } + else + { + cgraph_node_set_iterator csi; + + node = ipa_ref_node (ref); + if (!node->analyzed) + continue; + csi = cgraph_node_set_find (partition->cgraph_set, node); + if (!csi_end_p (csi) + && csi.index < last_visited_cgraph_node - cgraph_p) + cost--, internal++; + else + cost++; + } + for (j = 0; ipa_ref_list_refering_iterate (refs, j, ref); j++) + if (ref->refering_type == IPA_REF_VARPOOL) + { + varpool_node_set_iterator vsi; + + vnode = ipa_ref_refering_varpool_node (ref); + gcc_assert (vnode->finalized); + if (!vnode->aux && flag_toplevel_reorder + && partition_varpool_node_p (vnode)) + add_varpool_node_to_partition (partition, vnode); + vsi = varpool_node_set_find (partition->varpool_set, vnode); + if (!vsi_end_p (vsi) + && vsi.index < last_visited_varpool_node) + cost--; + else + cost++; + } + else + { + cgraph_node_set_iterator csi; + + node = ipa_ref_refering_node (ref); + gcc_assert (node->analyzed); + csi = cgraph_node_set_find (partition->cgraph_set, node); + if (!csi_end_p (csi) + && csi.index < last_visited_cgraph_node) + cost--; + else + cost++; + } + } + + /* If the partition is large enough, start looking for smallest boundary cost. */ + if (partition->insns < partition_size * 3 / 4 + || best_cost == INT_MAX + || ((!cost + || (best_internal * (HOST_WIDE_INT) cost + > (internal * (HOST_WIDE_INT)best_cost))) + && partition->insns < partition_size * 5 / 4)) + { + best_cost = cost; + best_internal = internal; + best_i = i; + best_n_nodes = VEC_length (cgraph_node_ptr, + partition->cgraph_set->nodes); + best_n_varpool_nodes = VEC_length (varpool_node_ptr, + partition->varpool_set->nodes); + best_total_size = total_size; + } + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "Step %i: added %s/%i, size %i, cost %i/%i best %i/%i, step %i\n", i, + cgraph_node_name (order[i]), order[i]->uid, partition->insns, cost, internal, + best_cost, best_internal, best_i); + /* Partition is too large, unwind into step when best cost was reached and + start new partition. */ + if (partition->insns > 2 * partition_size) + { + if (best_i != i) + { + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "Unwinding %i insertions to step %i\n", + i - best_i, best_i); + undo_partition (partition, best_n_nodes, best_n_varpool_nodes); + } + i = best_i; + /* When we are finished, avoid creating empty partition. */ + while (i < n_nodes - 1 && order[i + 1]->aux) + i++; + if (i == n_nodes - 1) + break; + partition = new_partition (""); + last_visited_cgraph_node = 0; + last_visited_varpool_node = 0; + total_size = best_total_size; + cost = 0; + + if (cgraph_dump_file) + fprintf (cgraph_dump_file, "New partition\n"); + best_n_nodes = 0; + best_n_varpool_nodes = 0; + best_cost = INT_MAX; + + /* Since the size of partitions is just approximate, update the size after + we finished current one. */ + if (npartitions < PARAM_VALUE (PARAM_LTO_PARTITIONS)) + partition_size = total_size + / (PARAM_VALUE (PARAM_LTO_PARTITIONS) - npartitions); + else + partition_size = INT_MAX; + + if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE)) + partition_size = PARAM_VALUE (MIN_PARTITION_SIZE); + npartitions ++; + } + } + + /* Varables that are not reachable from the code go into last partition. */ + if (flag_toplevel_reorder) + { + for (vnode = varpool_nodes; vnode; vnode = vnode->next) + if (partition_varpool_node_p (vnode) && !vnode->aux) + add_varpool_node_to_partition (partition, vnode); + } + else + { + while (varpool_pos < n_varpool_nodes) + { + if (!varpool_order[varpool_pos]->aux) + add_varpool_node_to_partition (partition, varpool_order[varpool_pos]); + varpool_pos++; + } + free (varpool_order); + } + free (order); +} + +/* Promote variable VNODE to be static. */ + +static bool +promote_var (struct varpool_node *vnode) +{ + if (TREE_PUBLIC (vnode->decl) || DECL_EXTERNAL (vnode->decl)) + return false; + gcc_assert (flag_wpa); + TREE_PUBLIC (vnode->decl) = 1; + DECL_VISIBILITY (vnode->decl) = VISIBILITY_HIDDEN; + DECL_VISIBILITY_SPECIFIED (vnode->decl) = true; + if (cgraph_dump_file) + fprintf (cgraph_dump_file, + "Promoting var as hidden: %s\n", varpool_node_name (vnode)); + return true; +} + +/* Promote function NODE to be static. */ + +static bool +promote_fn (struct cgraph_node *node) +{ + gcc_assert (flag_wpa); + if (TREE_PUBLIC (node->decl) || DECL_EXTERNAL (node->decl)) + return false; + TREE_PUBLIC (node->decl) = 1; + DECL_VISIBILITY (node->decl) = VISIBILITY_HIDDEN; + DECL_VISIBILITY_SPECIFIED (node->decl) = true; + if (cgraph_dump_file) + fprintf (cgraph_dump_file, + "Promoting function as hidden: %s/%i\n", + cgraph_node_name (node), node->uid); + return true; +} + +/* Find out all static decls that need to be promoted to global because + of cross file sharing. This function must be run in the WPA mode after + all inlinees are added. */ + +void +lto_promote_cross_file_statics (void) +{ + struct varpool_node *vnode; + unsigned i, n_sets; + cgraph_node_set set; + varpool_node_set vset; + cgraph_node_set_iterator csi; + varpool_node_set_iterator vsi; + VEC(varpool_node_ptr, heap) *promoted_initializers = NULL; + struct pointer_set_t *inserted = pointer_set_create (); + + gcc_assert (flag_wpa); + + n_sets = VEC_length (ltrans_partition, ltrans_partitions); + for (i = 0; i < n_sets; i++) + { + ltrans_partition part + = VEC_index (ltrans_partition, ltrans_partitions, i); + set = part->cgraph_set; + vset = part->varpool_set; + + /* If node called or referred to from other partition, it needs to be + globalized. */ + for (csi = csi_start (set); !csi_end_p (csi); csi_next (&csi)) + { + struct cgraph_node *node = csi_node (csi); + if (node->local.externally_visible) + continue; + if (node->global.inlined_to) + continue; + if ((!DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl)) + && (referenced_from_other_partition_p (&node->ref_list, set, vset) + || reachable_from_other_partition_p (node, set))) + promote_fn (node); + } + for (vsi = vsi_start (vset); !vsi_end_p (vsi); vsi_next (&vsi)) + { + vnode = vsi_node (vsi); + /* Constant pool references use internal labels and thus can not + be made global. It is sensible to keep those ltrans local to + allow better optimization. */ + if (!DECL_IN_CONSTANT_POOL (vnode->decl) && !DECL_COMDAT (vnode->decl) + && !vnode->externally_visible && vnode->analyzed + && referenced_from_other_partition_p (&vnode->ref_list, + set, vset)) + promote_var (vnode); + } + + /* We export the initializer of a read-only var into each partition + referencing the var. Folding might take declarations from the + initializer and use them, so everything referenced from the + initializer can be accessed from this partition after folding. + + This means that we need to promote all variables and functions + referenced from all initializers of read-only vars referenced + from this partition that are not in this partition. This needs + to be done recursively. */ + for (vnode = varpool_nodes; vnode; vnode = vnode->next) + if (const_value_known_p (vnode->decl) + && DECL_INITIAL (vnode->decl) + && !varpool_node_in_set_p (vnode, vset) + && referenced_from_this_partition_p (&vnode->ref_list, set, vset) + && !pointer_set_insert (inserted, vnode)) + VEC_safe_push (varpool_node_ptr, heap, promoted_initializers, vnode); + + while (!VEC_empty (varpool_node_ptr, promoted_initializers)) + { + int i; + struct ipa_ref *ref; + + vnode = VEC_pop (varpool_node_ptr, promoted_initializers); + for (i = 0; + ipa_ref_list_reference_iterate (&vnode->ref_list, i, ref); + i++) + { + if (ref->refered_type == IPA_REF_CGRAPH) + { + struct cgraph_node *n = ipa_ref_node (ref); + gcc_assert (!n->global.inlined_to); + if (!n->local.externally_visible + && !cgraph_node_in_set_p (n, set)) + promote_fn (n); + } + else + { + struct varpool_node *v = ipa_ref_varpool_node (ref); + if (varpool_node_in_set_p (v, vset)) + continue; + + /* Constant pool references use internal labels and thus + cannot be made global. It is sensible to keep those + ltrans local to allow better optimization. */ + if (DECL_IN_CONSTANT_POOL (v->decl)) + { + if (!pointer_set_insert (inserted, vnode)) + VEC_safe_push (varpool_node_ptr, heap, + promoted_initializers, v); + } + else if (!v->externally_visible && v->analyzed) + { + if (promote_var (v) + && DECL_INITIAL (v->decl) + && const_value_known_p (v->decl) + && !pointer_set_insert (inserted, vnode)) + VEC_safe_push (varpool_node_ptr, heap, + promoted_initializers, v); + } + } + } + } + } + pointer_set_destroy (inserted); +} diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h new file mode 100644 index 0000000..2160274 --- /dev/null +++ b/gcc/lto/lto-partition.h @@ -0,0 +1,40 @@ +/* LTO partitioning logic routines. + Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + + +/* Structure describing ltrans partitions. */ + +struct ltrans_partition_def +{ + cgraph_node_set cgraph_set; + varpool_node_set varpool_set; + const char * name; + int insns; +}; + +typedef struct ltrans_partition_def *ltrans_partition; +DEF_VEC_P(ltrans_partition); +DEF_VEC_ALLOC_P(ltrans_partition,heap); + +extern VEC(ltrans_partition, heap) *ltrans_partitions; + +void lto_1_to_1_map (void); +void lto_balanced_map (void); +void lto_promote_cross_file_statics (void); +void free_ltrans_partitions (void); diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c index 26b4065..921533f 100644 --- a/gcc/lto/lto.c +++ b/gcc/lto/lto.c @@ -1,5 +1,5 @@ /* Top-level LTO routines. - Copyright 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc. Contributed by CodeSourcery, Inc. This file is part of GCC. @@ -45,9 +45,7 @@ along with GCC; see the file COPYING3. If not see #include "lto-streamer.h" #include "tree-streamer.h" #include "splay-tree.h" -#include "params.h" -#include "ipa-inline.h" -#include "ipa-utils.h" +#include "lto-partition.h" static GTY(()) tree first_personality_decl; @@ -1398,899 +1396,6 @@ free_section_data (struct lto_file_decl_data *file_data ATTRIBUTE_UNUSED, #endif } -/* Structure describing ltrans partitions. */ - -struct ltrans_partition_def -{ - cgraph_node_set cgraph_set; - varpool_node_set varpool_set; - const char * name; - int insns; -}; - -typedef struct ltrans_partition_def *ltrans_partition; -DEF_VEC_P(ltrans_partition); -DEF_VEC_ALLOC_P(ltrans_partition,heap); - -static VEC(ltrans_partition, heap) *ltrans_partitions; - -static void add_cgraph_node_to_partition (ltrans_partition part, struct cgraph_node *node); -static void add_varpool_node_to_partition (ltrans_partition part, struct varpool_node *vnode); - -/* Create new partition with name NAME. */ -static ltrans_partition -new_partition (const char *name) -{ - ltrans_partition part = XCNEW (struct ltrans_partition_def); - part->cgraph_set = cgraph_node_set_new (); - part->varpool_set = varpool_node_set_new (); - part->name = name; - part->insns = 0; - VEC_safe_push (ltrans_partition, heap, ltrans_partitions, part); - return part; -} - -/* Free memory used by ltrans datastructures. */ -static void -free_ltrans_partitions (void) -{ - unsigned int idx; - ltrans_partition part; - for (idx = 0; VEC_iterate (ltrans_partition, ltrans_partitions, idx, part); idx++) - { - free_cgraph_node_set (part->cgraph_set); - free (part); - } - VEC_free (ltrans_partition, heap, ltrans_partitions); -} - -/* See all references that go to comdat objects and bring them into partition too. - Also see all aliases of the newly added entry and bring them, too. */ -static void -add_references_to_partition (ltrans_partition part, struct ipa_ref_list *refs) -{ - int i; - struct ipa_ref *ref; - for (i = 0; ipa_ref_list_reference_iterate (refs, i, ref); i++) - { - if (ref->refered_type == IPA_REF_CGRAPH - && (DECL_COMDAT (cgraph_function_node (ipa_ref_node (ref), - NULL)->decl) - || (ref->use == IPA_REF_ALIAS - && lookup_attribute - ("weakref", DECL_ATTRIBUTES (ipa_ref_node (ref)->decl)))) - && !cgraph_node_in_set_p (ipa_ref_node (ref), part->cgraph_set)) - add_cgraph_node_to_partition (part, ipa_ref_node (ref)); - else - if (ref->refered_type == IPA_REF_VARPOOL - && (DECL_COMDAT (ipa_ref_varpool_node (ref)->decl) - || (ref->use == IPA_REF_ALIAS - && lookup_attribute - ("weakref", - DECL_ATTRIBUTES (ipa_ref_varpool_node (ref)->decl)))) - && !varpool_node_in_set_p (ipa_ref_varpool_node (ref), - part->varpool_set)) - add_varpool_node_to_partition (part, ipa_ref_varpool_node (ref)); - } - for (i = 0; ipa_ref_list_refering_iterate (refs, i, ref); i++) - { - if (ref->refering_type == IPA_REF_CGRAPH - && ref->use == IPA_REF_ALIAS - && !cgraph_node_in_set_p (ipa_ref_refering_node (ref), - part->cgraph_set) - && !lookup_attribute ("weakref", - DECL_ATTRIBUTES - (ipa_ref_refering_node (ref)->decl))) - add_cgraph_node_to_partition (part, ipa_ref_refering_node (ref)); - else - if (ref->refering_type == IPA_REF_VARPOOL - && ref->use == IPA_REF_ALIAS - && !varpool_node_in_set_p (ipa_ref_refering_varpool_node (ref), - part->varpool_set) - && !lookup_attribute ("weakref", - DECL_ATTRIBUTES - (ipa_ref_refering_varpool_node (ref)->decl))) - add_varpool_node_to_partition (part, - ipa_ref_refering_varpool_node (ref)); - } -} - -/* Worker for add_cgraph_node_to_partition. */ - -static bool -add_cgraph_node_to_partition_1 (struct cgraph_node *node, void *data) -{ - ltrans_partition part = (ltrans_partition) data; - - /* non-COMDAT aliases of COMDAT functions needs to be output just once. */ - if (!DECL_COMDAT (node->decl) - && !node->global.inlined_to - && node->aux) - { - gcc_assert (node->thunk.thunk_p || node->alias); - return false; - } - - if (node->aux) - { - node->in_other_partition = 1; - if (cgraph_dump_file) - fprintf (cgraph_dump_file, "Node %s/%i now used in multiple partitions\n", - cgraph_node_name (node), node->uid); - } - node->aux = (void *)((size_t)node->aux + 1); - cgraph_node_set_add (part->cgraph_set, node); - return false; -} - -/* Add NODE to partition as well as the inline callees and referred comdats into partition PART. */ - -static void -add_cgraph_node_to_partition (ltrans_partition part, struct cgraph_node *node) -{ - struct cgraph_edge *e; - cgraph_node_set_iterator csi; - struct cgraph_node *n; - - /* If NODE is already there, we have nothing to do. */ - csi = cgraph_node_set_find (part->cgraph_set, node); - if (!csi_end_p (csi)) - return; - - cgraph_for_node_thunks_and_aliases (node, add_cgraph_node_to_partition_1, part, true); - - part->insns += inline_summary (node)->self_size; - - - cgraph_node_set_add (part->cgraph_set, node); - - for (e = node->callees; e; e = e->next_callee) - if ((!e->inline_failed - || DECL_COMDAT (cgraph_function_node (e->callee, NULL)->decl)) - && !cgraph_node_in_set_p (e->callee, part->cgraph_set)) - add_cgraph_node_to_partition (part, e->callee); - - /* The only way to assemble non-weakref alias is to add the aliased object into - the unit. */ - add_references_to_partition (part, &node->ref_list); - n = cgraph_function_node (node, NULL); - if (n != node - && !lookup_attribute ("weakref", - DECL_ATTRIBUTES (node->decl))) - add_cgraph_node_to_partition (part, n); - - if (node->same_comdat_group) - for (n = node->same_comdat_group; n != node; n = n->same_comdat_group) - add_cgraph_node_to_partition (part, n); -} - -/* Add VNODE to partition as well as comdat references partition PART. */ - -static void -add_varpool_node_to_partition (ltrans_partition part, struct varpool_node *vnode) -{ - varpool_node_set_iterator vsi; - struct varpool_node *v; - - /* If NODE is already there, we have nothing to do. */ - vsi = varpool_node_set_find (part->varpool_set, vnode); - if (!vsi_end_p (vsi)) - return; - - varpool_node_set_add (part->varpool_set, vnode); - - if (vnode->aux) - { - vnode->in_other_partition = 1; - if (cgraph_dump_file) - fprintf (cgraph_dump_file, "Varpool node %s now used in multiple partitions\n", - varpool_node_name (vnode)); - } - vnode->aux = (void *)((size_t)vnode->aux + 1); - - /* The only way to assemble non-weakref alias is to add the aliased object into - the unit. */ - v = varpool_variable_node (vnode, NULL); - if (v != vnode - && !lookup_attribute ("weakref", - DECL_ATTRIBUTES (vnode->decl))) - add_varpool_node_to_partition (part, v); - - add_references_to_partition (part, &vnode->ref_list); - - if (vnode->same_comdat_group - && !varpool_node_in_set_p (vnode->same_comdat_group, part->varpool_set)) - add_varpool_node_to_partition (part, vnode->same_comdat_group); -} - -/* Undo all additions until number of cgraph nodes in PARITION is N_CGRAPH_NODES - and number of varpool nodes is N_VARPOOL_NODES. */ - -static void -undo_partition (ltrans_partition partition, unsigned int n_cgraph_nodes, - unsigned int n_varpool_nodes) -{ - while (VEC_length (cgraph_node_ptr, partition->cgraph_set->nodes) > - n_cgraph_nodes) - { - struct cgraph_node *node = VEC_index (cgraph_node_ptr, - partition->cgraph_set->nodes, - n_cgraph_nodes); - partition->insns -= inline_summary (node)->self_size; - cgraph_node_set_remove (partition->cgraph_set, node); - node->aux = (void *)((size_t)node->aux - 1); - } - while (VEC_length (varpool_node_ptr, partition->varpool_set->nodes) > - n_varpool_nodes) - { - struct varpool_node *node = VEC_index (varpool_node_ptr, - partition->varpool_set->nodes, - n_varpool_nodes); - varpool_node_set_remove (partition->varpool_set, node); - node->aux = (void *)((size_t)node->aux - 1); - } -} - -/* Return true if NODE should be partitioned. - This means that partitioning algorithm should put NODE into one of partitions. - This apply to most functions with bodies. Functions that are not partitions - are put into every unit needing them. This is the case of i.e. COMDATs. */ - -static bool -partition_cgraph_node_p (struct cgraph_node *node) -{ - /* We will get proper partition based on function they are inlined to. */ - if (node->global.inlined_to) - return false; - /* Nodes without a body do not need partitioning. */ - if (!node->analyzed) - return false; - /* Extern inlines and comdat are always only in partitions they are needed. */ - if (DECL_EXTERNAL (node->decl) - || (DECL_COMDAT (node->decl) - && !cgraph_used_from_object_file_p (node))) - return false; - if (lookup_attribute ("weakref", DECL_ATTRIBUTES (node->decl))) - return false; - return true; -} - -/* Return true if VNODE should be partitioned. - This means that partitioning algorithm should put VNODE into one of partitions. */ - -static bool -partition_varpool_node_p (struct varpool_node *vnode) -{ - if (vnode->alias || !vnode->needed) - return false; - /* Constant pool and comdat are always only in partitions they are needed. */ - if (DECL_IN_CONSTANT_POOL (vnode->decl) - || (DECL_COMDAT (vnode->decl) - && !vnode->force_output - && !varpool_used_from_object_file_p (vnode))) - return false; - if (lookup_attribute ("weakref", DECL_ATTRIBUTES (vnode->decl))) - return false; - return true; -} - -/* Group cgrah nodes by input files. This is used mainly for testing - right now. */ - -static void -lto_1_to_1_map (void) -{ - struct cgraph_node *node; - struct varpool_node *vnode; - struct lto_file_decl_data *file_data; - struct pointer_map_t *pmap; - ltrans_partition partition; - void **slot; - int npartitions = 0; - - timevar_push (TV_WHOPR_WPA); - - pmap = pointer_map_create (); - - for (node = cgraph_nodes; node; node = node->next) - { - if (!partition_cgraph_node_p (node) - || node->aux) - continue; - - file_data = node->local.lto_file_data; - - if (file_data) - { - slot = pointer_map_contains (pmap, file_data); - if (slot) - partition = (ltrans_partition) *slot; - else - { - partition = new_partition (file_data->file_name); - slot = pointer_map_insert (pmap, file_data); - *slot = partition; - npartitions++; - } - } - else if (!file_data - && VEC_length (ltrans_partition, ltrans_partitions)) - partition = VEC_index (ltrans_partition, ltrans_partitions, 0); - else - { - partition = new_partition (""); - slot = pointer_map_insert (pmap, NULL); - *slot = partition; - npartitions++; - } - - add_cgraph_node_to_partition (partition, node); - } - - for (vnode = varpool_nodes; vnode; vnode = vnode->next) - { - if (!partition_varpool_node_p (vnode) - || vnode->aux) - continue; - file_data = vnode->lto_file_data; - slot = pointer_map_contains (pmap, file_data); - if (slot) - partition = (ltrans_partition) *slot; - else - { - partition = new_partition (file_data->file_name); - slot = pointer_map_insert (pmap, file_data); - *slot = partition; - npartitions++; - } - - add_varpool_node_to_partition (partition, vnode); - } - for (node = cgraph_nodes; node; node = node->next) - node->aux = NULL; - for (vnode = varpool_nodes; vnode; vnode = vnode->next) - vnode->aux = NULL; - - /* If the cgraph is empty, create one cgraph node set so that there is still - an output file for any variables that need to be exported in a DSO. */ - if (!npartitions) - new_partition ("empty"); - - pointer_map_destroy (pmap); - - timevar_pop (TV_WHOPR_WPA); - - lto_stats.num_cgraph_partitions += VEC_length (ltrans_partition, - ltrans_partitions); -} - -/* Helper function for qsort; sort nodes by order. */ -static int -node_cmp (const void *pa, const void *pb) -{ - const struct cgraph_node *a = *(const struct cgraph_node * const *) pa; - const struct cgraph_node *b = *(const struct cgraph_node * const *) pb; - return b->order - a->order; -} - -/* Helper function for qsort; sort nodes by order. */ -static int -varpool_node_cmp (const void *pa, const void *pb) -{ - const struct varpool_node *a = *(const struct varpool_node * const *) pa; - const struct varpool_node *b = *(const struct varpool_node * const *) pb; - return b->order - a->order; -} - -/* Group cgraph nodes into equally-sized partitions. - - The partitioning algorithm is simple: nodes are taken in predefined order. - The order corresponds to the order we want functions to have in the final - output. In the future this will be given by function reordering pass, but - at the moment we use the topological order, which is a good approximation. - - The goal is to partition this linear order into intervals (partitions) so - that all the partitions have approximately the same size and the number of - callgraph or IPA reference edges crossing boundaries is minimal. - - This is a lot faster (O(n) in size of callgraph) than algorithms doing - priority-based graph clustering that are generally O(n^2) and, since - WHOPR is designed to make things go well across partitions, it leads - to good results. - - We compute the expected size of a partition as: - - max (total_size / lto_partitions, min_partition_size) - - We use dynamic expected size of partition so small programs are partitioned - into enough partitions to allow use of multiple CPUs, while large programs - are not partitioned too much. Creating too many partitions significantly - increases the streaming overhead. - - In the future, we would like to bound the maximal size of partitions so as - to prevent the LTRANS stage from consuming too much memory. At the moment, - however, the WPA stage is the most memory intensive for large benchmarks, - since too many types and declarations are read into memory. - - The function implements a simple greedy algorithm. Nodes are being added - to the current partition until after 3/4 of the expected partition size is - reached. Past this threshold, we keep track of boundary size (number of - edges going to other partitions) and continue adding functions until after - the current partition has grown to twice the expected partition size. Then - the process is undone to the point where the minimal ratio of boundary size - and in-partition calls was reached. */ - -static void -lto_balanced_map (void) -{ - int n_nodes = 0; - int n_varpool_nodes = 0, varpool_pos = 0; - struct cgraph_node **postorder = - XCNEWVEC (struct cgraph_node *, cgraph_n_nodes); - struct cgraph_node **order = XNEWVEC (struct cgraph_node *, cgraph_max_uid); - struct varpool_node **varpool_order = NULL; - int i, postorder_len; - struct cgraph_node *node; - int total_size = 0, best_total_size = 0; - int partition_size; - ltrans_partition partition; - unsigned int last_visited_cgraph_node = 0, last_visited_varpool_node = 0; - struct varpool_node *vnode; - int cost = 0, internal = 0; - int best_n_nodes = 0, best_n_varpool_nodes = 0, best_i = 0, best_cost = - INT_MAX, best_internal = 0; - int npartitions; - int current_order = -1; - - for (vnode = varpool_nodes; vnode; vnode = vnode->next) - gcc_assert (!vnode->aux); - /* Until we have better ordering facility, use toplogical order. - Include only nodes we will partition and compute estimate of program - size. Note that since nodes that are not partitioned might be put into - multiple partitions, this is just an estimate of real size. This is why - we keep partition_size updated after every partition is finalized. */ - postorder_len = ipa_reverse_postorder (postorder); - - for (i = 0; i < postorder_len; i++) - { - node = postorder[i]; - if (partition_cgraph_node_p (node)) - { - order[n_nodes++] = node; - total_size += inline_summary (node)->size; - } - } - free (postorder); - - if (!flag_toplevel_reorder) - { - qsort (order, n_nodes, sizeof (struct cgraph_node *), node_cmp); - - for (vnode = varpool_nodes; vnode; vnode = vnode->next) - if (partition_varpool_node_p (vnode)) - n_varpool_nodes++; - varpool_order = XNEWVEC (struct varpool_node *, n_varpool_nodes); - - n_varpool_nodes = 0; - for (vnode = varpool_nodes; vnode; vnode = vnode->next) - if (partition_varpool_node_p (vnode)) - varpool_order[n_varpool_nodes++] = vnode; - qsort (varpool_order, n_varpool_nodes, sizeof (struct varpool_node *), - varpool_node_cmp); - } - - /* Compute partition size and create the first partition. */ - partition_size = total_size / PARAM_VALUE (PARAM_LTO_PARTITIONS); - if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE)) - partition_size = PARAM_VALUE (MIN_PARTITION_SIZE); - npartitions = 1; - partition = new_partition (""); - if (cgraph_dump_file) - fprintf (cgraph_dump_file, "Total unit size: %i, partition size: %i\n", - total_size, partition_size); - - for (i = 0; i < n_nodes; i++) - { - if (order[i]->aux) - continue; - - current_order = order[i]->order; - - if (!flag_toplevel_reorder) - while (varpool_pos < n_varpool_nodes && varpool_order[varpool_pos]->order < current_order) - { - if (!varpool_order[varpool_pos]->aux) - add_varpool_node_to_partition (partition, varpool_order[varpool_pos]); - varpool_pos++; - } - - add_cgraph_node_to_partition (partition, order[i]); - total_size -= inline_summary (order[i])->size; - - - /* Once we added a new node to the partition, we also want to add - all referenced variables unless they was already added into some - earlier partition. - add_cgraph_node_to_partition adds possibly multiple nodes and - variables that are needed to satisfy needs of ORDER[i]. - We remember last visited cgraph and varpool node from last iteration - of outer loop that allows us to process every new addition. - - At the same time we compute size of the boundary into COST. Every - callgraph or IPA reference edge leaving the partition contributes into - COST. Every edge inside partition was earlier computed as one leaving - it and thus we need to subtract it from COST. */ - while (last_visited_cgraph_node < - VEC_length (cgraph_node_ptr, partition->cgraph_set->nodes) - || last_visited_varpool_node < VEC_length (varpool_node_ptr, - partition->varpool_set-> - nodes)) - { - struct ipa_ref_list *refs; - int j; - struct ipa_ref *ref; - bool cgraph_p = false; - - if (last_visited_cgraph_node < - VEC_length (cgraph_node_ptr, partition->cgraph_set->nodes)) - { - struct cgraph_edge *edge; - - cgraph_p = true; - node = VEC_index (cgraph_node_ptr, partition->cgraph_set->nodes, - last_visited_cgraph_node); - refs = &node->ref_list; - - last_visited_cgraph_node++; - - gcc_assert (node->analyzed); - - /* Compute boundary cost of callgraph edges. */ - for (edge = node->callees; edge; edge = edge->next_callee) - if (edge->callee->analyzed) - { - int edge_cost = edge->frequency; - cgraph_node_set_iterator csi; - - if (!edge_cost) - edge_cost = 1; - gcc_assert (edge_cost > 0); - csi = cgraph_node_set_find (partition->cgraph_set, edge->callee); - if (!csi_end_p (csi) - && csi.index < last_visited_cgraph_node - 1) - cost -= edge_cost, internal+= edge_cost; - else - cost += edge_cost; - } - for (edge = node->callers; edge; edge = edge->next_caller) - { - int edge_cost = edge->frequency; - cgraph_node_set_iterator csi; - - gcc_assert (edge->caller->analyzed); - if (!edge_cost) - edge_cost = 1; - gcc_assert (edge_cost > 0); - csi = cgraph_node_set_find (partition->cgraph_set, edge->caller); - if (!csi_end_p (csi) - && csi.index < last_visited_cgraph_node) - cost -= edge_cost; - else - cost += edge_cost; - } - } - else - { - refs = - &VEC_index (varpool_node_ptr, partition->varpool_set->nodes, - last_visited_varpool_node)->ref_list; - last_visited_varpool_node++; - } - - /* Compute boundary cost of IPA REF edges and at the same time look into - variables referenced from current partition and try to add them. */ - for (j = 0; ipa_ref_list_reference_iterate (refs, j, ref); j++) - if (ref->refered_type == IPA_REF_VARPOOL) - { - varpool_node_set_iterator vsi; - - vnode = ipa_ref_varpool_node (ref); - if (!vnode->finalized) - continue; - if (!vnode->aux && flag_toplevel_reorder - && partition_varpool_node_p (vnode)) - add_varpool_node_to_partition (partition, vnode); - vsi = varpool_node_set_find (partition->varpool_set, vnode); - if (!vsi_end_p (vsi) - && vsi.index < last_visited_varpool_node - !cgraph_p) - cost--, internal++; - else - cost++; - } - else - { - cgraph_node_set_iterator csi; - - node = ipa_ref_node (ref); - if (!node->analyzed) - continue; - csi = cgraph_node_set_find (partition->cgraph_set, node); - if (!csi_end_p (csi) - && csi.index < last_visited_cgraph_node - cgraph_p) - cost--, internal++; - else - cost++; - } - for (j = 0; ipa_ref_list_refering_iterate (refs, j, ref); j++) - if (ref->refering_type == IPA_REF_VARPOOL) - { - varpool_node_set_iterator vsi; - - vnode = ipa_ref_refering_varpool_node (ref); - gcc_assert (vnode->finalized); - if (!vnode->aux && flag_toplevel_reorder - && partition_varpool_node_p (vnode)) - add_varpool_node_to_partition (partition, vnode); - vsi = varpool_node_set_find (partition->varpool_set, vnode); - if (!vsi_end_p (vsi) - && vsi.index < last_visited_varpool_node) - cost--; - else - cost++; - } - else - { - cgraph_node_set_iterator csi; - - node = ipa_ref_refering_node (ref); - gcc_assert (node->analyzed); - csi = cgraph_node_set_find (partition->cgraph_set, node); - if (!csi_end_p (csi) - && csi.index < last_visited_cgraph_node) - cost--; - else - cost++; - } - } - - /* If the partition is large enough, start looking for smallest boundary cost. */ - if (partition->insns < partition_size * 3 / 4 - || best_cost == INT_MAX - || ((!cost - || (best_internal * (HOST_WIDE_INT) cost - > (internal * (HOST_WIDE_INT)best_cost))) - && partition->insns < partition_size * 5 / 4)) - { - best_cost = cost; - best_internal = internal; - best_i = i; - best_n_nodes = VEC_length (cgraph_node_ptr, - partition->cgraph_set->nodes); - best_n_varpool_nodes = VEC_length (varpool_node_ptr, - partition->varpool_set->nodes); - best_total_size = total_size; - } - if (cgraph_dump_file) - fprintf (cgraph_dump_file, "Step %i: added %s/%i, size %i, cost %i/%i best %i/%i, step %i\n", i, - cgraph_node_name (order[i]), order[i]->uid, partition->insns, cost, internal, - best_cost, best_internal, best_i); - /* Partition is too large, unwind into step when best cost was reached and - start new partition. */ - if (partition->insns > 2 * partition_size) - { - if (best_i != i) - { - if (cgraph_dump_file) - fprintf (cgraph_dump_file, "Unwinding %i insertions to step %i\n", - i - best_i, best_i); - undo_partition (partition, best_n_nodes, best_n_varpool_nodes); - } - i = best_i; - /* When we are finished, avoid creating empty partition. */ - while (i < n_nodes - 1 && order[i + 1]->aux) - i++; - if (i == n_nodes - 1) - break; - partition = new_partition (""); - last_visited_cgraph_node = 0; - last_visited_varpool_node = 0; - total_size = best_total_size; - cost = 0; - - if (cgraph_dump_file) - fprintf (cgraph_dump_file, "New partition\n"); - best_n_nodes = 0; - best_n_varpool_nodes = 0; - best_cost = INT_MAX; - - /* Since the size of partitions is just approximate, update the size after - we finished current one. */ - if (npartitions < PARAM_VALUE (PARAM_LTO_PARTITIONS)) - partition_size = total_size - / (PARAM_VALUE (PARAM_LTO_PARTITIONS) - npartitions); - else - partition_size = INT_MAX; - - if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE)) - partition_size = PARAM_VALUE (MIN_PARTITION_SIZE); - npartitions ++; - } - } - - /* Varables that are not reachable from the code go into last partition. */ - if (flag_toplevel_reorder) - { - for (vnode = varpool_nodes; vnode; vnode = vnode->next) - if (partition_varpool_node_p (vnode) && !vnode->aux) - add_varpool_node_to_partition (partition, vnode); - } - else - { - while (varpool_pos < n_varpool_nodes) - { - if (!varpool_order[varpool_pos]->aux) - add_varpool_node_to_partition (partition, varpool_order[varpool_pos]); - varpool_pos++; - } - free (varpool_order); - } - free (order); -} - -/* Promote variable VNODE to be static. */ - -static bool -promote_var (struct varpool_node *vnode) -{ - if (TREE_PUBLIC (vnode->decl) || DECL_EXTERNAL (vnode->decl)) - return false; - gcc_assert (flag_wpa); - TREE_PUBLIC (vnode->decl) = 1; - DECL_VISIBILITY (vnode->decl) = VISIBILITY_HIDDEN; - DECL_VISIBILITY_SPECIFIED (vnode->decl) = true; - if (cgraph_dump_file) - fprintf (cgraph_dump_file, - "Promoting var as hidden: %s\n", varpool_node_name (vnode)); - return true; -} - -/* Promote function NODE to be static. */ - -static bool -promote_fn (struct cgraph_node *node) -{ - gcc_assert (flag_wpa); - if (TREE_PUBLIC (node->decl) || DECL_EXTERNAL (node->decl)) - return false; - TREE_PUBLIC (node->decl) = 1; - DECL_VISIBILITY (node->decl) = VISIBILITY_HIDDEN; - DECL_VISIBILITY_SPECIFIED (node->decl) = true; - if (cgraph_dump_file) - fprintf (cgraph_dump_file, - "Promoting function as hidden: %s/%i\n", - cgraph_node_name (node), node->uid); - return true; -} - -/* Find out all static decls that need to be promoted to global because - of cross file sharing. This function must be run in the WPA mode after - all inlinees are added. */ - -static void -lto_promote_cross_file_statics (void) -{ - struct varpool_node *vnode; - unsigned i, n_sets; - cgraph_node_set set; - varpool_node_set vset; - cgraph_node_set_iterator csi; - varpool_node_set_iterator vsi; - VEC(varpool_node_ptr, heap) *promoted_initializers = NULL; - struct pointer_set_t *inserted = pointer_set_create (); - - gcc_assert (flag_wpa); - - n_sets = VEC_length (ltrans_partition, ltrans_partitions); - for (i = 0; i < n_sets; i++) - { - ltrans_partition part - = VEC_index (ltrans_partition, ltrans_partitions, i); - set = part->cgraph_set; - vset = part->varpool_set; - - /* If node called or referred to from other partition, it needs to be - globalized. */ - for (csi = csi_start (set); !csi_end_p (csi); csi_next (&csi)) - { - struct cgraph_node *node = csi_node (csi); - if (node->local.externally_visible) - continue; - if (node->global.inlined_to) - continue; - if ((!DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl)) - && (referenced_from_other_partition_p (&node->ref_list, set, vset) - || reachable_from_other_partition_p (node, set))) - promote_fn (node); - } - for (vsi = vsi_start (vset); !vsi_end_p (vsi); vsi_next (&vsi)) - { - vnode = vsi_node (vsi); - /* Constant pool references use internal labels and thus can not - be made global. It is sensible to keep those ltrans local to - allow better optimization. */ - if (!DECL_IN_CONSTANT_POOL (vnode->decl) && !DECL_COMDAT (vnode->decl) - && !vnode->externally_visible && vnode->analyzed - && referenced_from_other_partition_p (&vnode->ref_list, - set, vset)) - promote_var (vnode); - } - - /* We export the initializer of a read-only var into each partition - referencing the var. Folding might take declarations from the - initializer and use them, so everything referenced from the - initializer can be accessed from this partition after folding. - - This means that we need to promote all variables and functions - referenced from all initializers of read-only vars referenced - from this partition that are not in this partition. This needs - to be done recursively. */ - for (vnode = varpool_nodes; vnode; vnode = vnode->next) - if (const_value_known_p (vnode->decl) - && DECL_INITIAL (vnode->decl) - && !varpool_node_in_set_p (vnode, vset) - && referenced_from_this_partition_p (&vnode->ref_list, set, vset) - && !pointer_set_insert (inserted, vnode)) - VEC_safe_push (varpool_node_ptr, heap, promoted_initializers, vnode); - - while (!VEC_empty (varpool_node_ptr, promoted_initializers)) - { - int i; - struct ipa_ref *ref; - - vnode = VEC_pop (varpool_node_ptr, promoted_initializers); - for (i = 0; - ipa_ref_list_reference_iterate (&vnode->ref_list, i, ref); - i++) - { - if (ref->refered_type == IPA_REF_CGRAPH) - { - struct cgraph_node *n = ipa_ref_node (ref); - gcc_assert (!n->global.inlined_to); - if (!n->local.externally_visible - && !cgraph_node_in_set_p (n, set)) - promote_fn (n); - } - else - { - struct varpool_node *v = ipa_ref_varpool_node (ref); - if (varpool_node_in_set_p (v, vset)) - continue; - - /* Constant pool references use internal labels and thus - cannot be made global. It is sensible to keep those - ltrans local to allow better optimization. */ - if (DECL_IN_CONSTANT_POOL (v->decl)) - { - if (!pointer_set_insert (inserted, vnode)) - VEC_safe_push (varpool_node_ptr, heap, - promoted_initializers, v); - } - else if (!v->externally_visible && v->analyzed) - { - if (promote_var (v) - && DECL_INITIAL (v->decl) - && const_value_known_p (v->decl) - && !pointer_set_insert (inserted, vnode)) - VEC_safe_push (varpool_node_ptr, heap, - promoted_initializers, v); - } - } - } - } - } - pointer_set_destroy (inserted); -} - static lto_file *current_lto_file; /* Helper for qsort; compare partitions and return one with smaller size. |