diff options
author | Jakub Jelinek <jakub@redhat.com> | 2020-10-28 10:28:18 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2020-10-28 10:29:09 +0100 |
commit | f165ef89c08ddabb19583e45e8a6819f810d95ab (patch) | |
tree | 27d05b9d41f4df1998e9b8e04745d4f32bb7d54a /gcc | |
parent | 279a9ce9d545f65a0bb1bc4564abafabfc25f82d (diff) | |
download | gcc-f165ef89c08ddabb19583e45e8a6819f810d95ab.zip gcc-f165ef89c08ddabb19583e45e8a6819f810d95ab.tar.gz gcc-f165ef89c08ddabb19583e45e8a6819f810d95ab.tar.bz2 |
lto: LTO cgraph support for late declare variant resolution [PR96680]
> I've tried to add the saving/restoring next to ipa refs saving/restoring, as
> the declare variant alt stuff is kind of extension of those, unfortunately
> following doesn't compile, because I need to also write or read a tree there
> (ctx is a portion of DECL_ATTRIBUTES of the base function), but the ipa refs
> write/read back functions don't have arguments that can be used for that.
This patch adds the streaming out and in of those omp_declare_variant_alt
hash table on the side data for the declare_variant_alt cgraph_nodes and
treats for LTO purposes the declare_variant_alt nodes (which have no body)
as if they contained a body that calls all the possible variants.
After IPA all the calls to these magic declare_variant_alt calls are
replaced with call to one of the variant depending on which one has the
highest score in the context.
2020-10-28 Jakub Jelinek <jakub@redhat.com>
PR lto/96680
gcc/
* lto-streamer.h (omp_lto_output_declare_variant_alt,
omp_lto_input_declare_variant_alt): Declare variant.
* symtab.c (symtab_node::get_partitioning_class): Return
SYMBOL_DUPLICATE for declare_variant_alt nodes.
* passes.c (ipa_write_summaries): Add declare_variant_alt to
partition.
* lto-cgraph.c (output_refs): Call omp_lto_output_declare_variant_alt
on declare_variant_alt nodes.
(input_refs): Call omp_lto_input_declare_variant_alt on
declare_variant_alt nodes.
* lto-streamer-out.c (output_function): Don't call
collect_block_tree_leafs if DECL_INITIAL is error_mark_node.
(lto_output): Call output_function even for declare_variant_alt
nodes.
* omp-general.c (omp_lto_output_declare_variant_alt,
omp_lto_input_declare_variant_alt): New functions.
gcc/lto/
* lto-common.c (lto_fixup_prevailing_decls): Don't use
LTO_NO_PREVAIL on TREE_LIST's TREE_PURPOSE.
* lto-partition.c (lto_balanced_map): Treat declare_variant_alt
nodes like definitions.
libgomp/
* testsuite/libgomp.c/declare-variant-1.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/lto-cgraph.c | 6 | ||||
-rw-r--r-- | gcc/lto-streamer-out.c | 5 | ||||
-rw-r--r-- | gcc/lto-streamer.h | 6 | ||||
-rw-r--r-- | gcc/lto/lto-common.c | 1 | ||||
-rw-r--r-- | gcc/lto/lto-partition.c | 5 | ||||
-rw-r--r-- | gcc/omp-general.c | 121 | ||||
-rw-r--r-- | gcc/passes.c | 3 | ||||
-rw-r--r-- | gcc/symtab.c | 2 |
8 files changed, 142 insertions, 7 deletions
diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index 19d4ca5..91900a1 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -767,6 +767,9 @@ output_refs (lto_symtab_encoder_t encoder) for (int i = 0; node->iterate_reference (i, ref); i++) lto_output_ref (ob, ref, encoder); } + if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node)) + if (cnode->declare_variant_alt) + omp_lto_output_declare_variant_alt (ob, cnode, encoder); } streamer_write_uhwi_stream (ob->main_stream, 0); @@ -1608,6 +1611,9 @@ input_refs (class lto_input_block *ib, input_ref (ib, node, nodes); count--; } + if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node)) + if (cnode->declare_variant_alt) + omp_lto_input_declare_variant_alt (ib, cnode, nodes); } } diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c index 7882c89..0ca2796 100644 --- a/gcc/lto-streamer-out.c +++ b/gcc/lto-streamer-out.c @@ -2424,7 +2424,7 @@ output_function (struct cgraph_node *node) /* As we do not recurse into BLOCK_SUBBLOCKS but only BLOCK_SUPERCONTEXT collect block tree leafs and stream those. */ auto_vec<tree> block_tree_leafs; - if (DECL_INITIAL (function)) + if (DECL_INITIAL (function) && DECL_INITIAL (function) != error_mark_node) collect_block_tree_leafs (DECL_INITIAL (function), block_tree_leafs); streamer_write_uhwi (ob, block_tree_leafs.length ()); for (unsigned i = 0; i < block_tree_leafs.length (); ++i) @@ -2788,7 +2788,8 @@ lto_output (void) && flag_incremental_link != INCREMENTAL_LINK_LTO) /* Thunks have no body but they may be synthetized at WPA time. */ - || DECL_ARGUMENTS (cnode->decl))) + || DECL_ARGUMENTS (cnode->decl) + || cnode->declare_variant_alt)) output_function (cnode); else if ((vnode = dyn_cast <varpool_node *> (snode)) && (DECL_INITIAL (vnode->decl) != error_mark_node diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h index b465a5e..c75a8b2 100644 --- a/gcc/lto-streamer.h +++ b/gcc/lto-streamer.h @@ -927,6 +927,12 @@ bool reachable_from_this_partition_p (struct cgraph_node *, lto_symtab_encoder_t compute_ltrans_boundary (lto_symtab_encoder_t encoder); void select_what_to_stream (void); +/* In omp-general.c. */ +void omp_lto_output_declare_variant_alt (lto_simple_output_block *, + cgraph_node *, lto_symtab_encoder_t); +void omp_lto_input_declare_variant_alt (lto_input_block *, cgraph_node *, + vec<symtab_node *>); + /* In options-save.c. */ void cl_target_option_stream_out (struct output_block *, struct bitpack_d *, struct cl_target_option *); diff --git a/gcc/lto/lto-common.c b/gcc/lto/lto-common.c index 3ca0fd8..6944c46 100644 --- a/gcc/lto/lto-common.c +++ b/gcc/lto/lto-common.c @@ -2592,7 +2592,6 @@ lto_fixup_prevailing_decls (tree t) case TREE_LIST: LTO_SET_PREVAIL (TREE_VALUE (t)); LTO_SET_PREVAIL (TREE_PURPOSE (t)); - LTO_NO_PREVAIL (TREE_PURPOSE (t)); break; default: gcc_unreachable (); diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c index 7c15181..cc721f9 100644 --- a/gcc/lto/lto-partition.c +++ b/gcc/lto/lto-partition.c @@ -593,7 +593,8 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) last_visited_node++; - gcc_assert (node->definition || node->weakref); + gcc_assert (node->definition || node->weakref + || node->declare_variant_alt); /* Compute boundary cost of callgraph edges. */ for (edge = node->callees; edge; edge = edge->next_callee) @@ -704,7 +705,7 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) int index; node = dyn_cast <cgraph_node *> (ref->referring); - gcc_assert (node->definition); + gcc_assert (node->definition || node->declare_variant_alt); index = lto_symtab_encoder_lookup (partition->encoder, node); if (index != LCC_NOT_FOUND diff --git a/gcc/omp-general.c b/gcc/omp-general.c index b70e3e3..b66dfb5 100644 --- a/gcc/omp-general.c +++ b/gcc/omp-general.c @@ -42,6 +42,8 @@ along with GCC; see the file COPYING3. If not see #include "tree-pass.h" #include "omp-device-properties.h" #include "tree-iterator.h" +#include "data-streamer.h" +#include "streamer-hooks.h" enum omp_requires omp_requires_mask; @@ -2337,6 +2339,125 @@ omp_resolve_declare_variant (tree base) ? TREE_PURPOSE (TREE_VALUE (variant1)) : base); } +void +omp_lto_output_declare_variant_alt (lto_simple_output_block *ob, + cgraph_node *node, + lto_symtab_encoder_t encoder) +{ + gcc_assert (node->declare_variant_alt); + + omp_declare_variant_base_entry entry; + entry.base = NULL; + entry.node = node; + entry.variants = NULL; + omp_declare_variant_base_entry *entryp + = omp_declare_variant_alt->find_with_hash (&entry, DECL_UID (node->decl)); + gcc_assert (entryp); + + int nbase = lto_symtab_encoder_lookup (encoder, entryp->base); + gcc_assert (nbase != LCC_NOT_FOUND); + streamer_write_hwi_stream (ob->main_stream, nbase); + + streamer_write_hwi_stream (ob->main_stream, entryp->variants->length ()); + + unsigned int i; + omp_declare_variant_entry *varentry; + FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry) + { + int nvar = lto_symtab_encoder_lookup (encoder, varentry->variant); + gcc_assert (nvar != LCC_NOT_FOUND); + streamer_write_hwi_stream (ob->main_stream, nvar); + + for (widest_int *w = &varentry->score; ; + w = &varentry->score_in_declare_simd_clone) + { + unsigned len = w->get_len (); + streamer_write_hwi_stream (ob->main_stream, len); + const HOST_WIDE_INT *val = w->get_val (); + for (unsigned j = 0; j < len; j++) + streamer_write_hwi_stream (ob->main_stream, val[j]); + if (w == &varentry->score_in_declare_simd_clone) + break; + } + + HOST_WIDE_INT cnt = -1; + HOST_WIDE_INT i = varentry->matches ? 1 : 0; + for (tree attr = DECL_ATTRIBUTES (entryp->base->decl); + attr; attr = TREE_CHAIN (attr), i += 2) + { + attr = lookup_attribute ("omp declare variant base", attr); + if (attr == NULL_TREE) + break; + + if (varentry->ctx == TREE_VALUE (TREE_VALUE (attr))) + { + cnt = i; + break; + } + } + + gcc_assert (cnt != -1); + streamer_write_hwi_stream (ob->main_stream, cnt); + } +} + +void +omp_lto_input_declare_variant_alt (lto_input_block *ib, cgraph_node *node, + vec<symtab_node *> nodes) +{ + gcc_assert (node->declare_variant_alt); + omp_declare_variant_base_entry *entryp + = ggc_cleared_alloc<omp_declare_variant_base_entry> (); + entryp->base = dyn_cast<cgraph_node *> (nodes[streamer_read_hwi (ib)]); + entryp->node = node; + unsigned int len = streamer_read_hwi (ib); + vec_alloc (entryp->variants, len); + + for (unsigned int i = 0; i < len; i++) + { + omp_declare_variant_entry varentry; + varentry.variant + = dyn_cast<cgraph_node *> (nodes[streamer_read_hwi (ib)]); + for (widest_int *w = &varentry.score; ; + w = &varentry.score_in_declare_simd_clone) + { + unsigned len2 = streamer_read_hwi (ib); + HOST_WIDE_INT arr[WIDE_INT_MAX_ELTS]; + gcc_assert (len2 <= WIDE_INT_MAX_ELTS); + for (unsigned int j = 0; j < len2; j++) + arr[j] = streamer_read_hwi (ib); + *w = widest_int::from_array (arr, len2, true); + if (w == &varentry.score_in_declare_simd_clone) + break; + } + + HOST_WIDE_INT cnt = streamer_read_hwi (ib); + HOST_WIDE_INT j = 0; + varentry.ctx = NULL_TREE; + varentry.matches = (cnt & 1) ? true : false; + cnt &= ~HOST_WIDE_INT_1; + for (tree attr = DECL_ATTRIBUTES (entryp->base->decl); + attr; attr = TREE_CHAIN (attr), j += 2) + { + attr = lookup_attribute ("omp declare variant base", attr); + if (attr == NULL_TREE) + break; + + if (cnt == j) + { + varentry.ctx = TREE_VALUE (TREE_VALUE (attr)); + break; + } + } + gcc_assert (varentry.ctx != NULL_TREE); + entryp->variants->quick_push (varentry); + } + if (omp_declare_variant_alt == NULL) + omp_declare_variant_alt + = hash_table<omp_declare_variant_alt_hasher>::create_ggc (64); + *omp_declare_variant_alt->find_slot_with_hash (entryp, DECL_UID (node->decl), + INSERT) = entryp; +} /* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK macro on gomp-constants.h. We do not check for overflow. */ diff --git a/gcc/passes.c b/gcc/passes.c index 02a47e2..079ad1a 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -2731,7 +2731,8 @@ ipa_write_summaries (void) { struct cgraph_node *node = order[i]; - if (node->definition && node->need_lto_streaming) + if ((node->definition || node->declare_variant_alt) + && node->need_lto_streaming) { if (gimple_has_body_p (node->decl)) lto_prepare_function_for_streaming (node); diff --git a/gcc/symtab.c b/gcc/symtab.c index 067ae2e..9db88fa 100644 --- a/gcc/symtab.c +++ b/gcc/symtab.c @@ -2006,7 +2006,7 @@ symtab_node::get_partitioning_class (void) if (DECL_ABSTRACT_P (decl)) return SYMBOL_EXTERNAL; - if (cnode && cnode->inlined_to) + if (cnode && (cnode->inlined_to || cnode->declare_variant_alt)) return SYMBOL_DUPLICATE; /* Transparent aliases are always duplicated. */ |