aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorDavid Malcolm <dmalcolm@redhat.com>2020-08-18 18:52:17 -0400
committerDavid Malcolm <dmalcolm@redhat.com>2020-09-16 19:01:58 -0400
commitb28491dc2d79763ecbff4f0b9f1f3e48a443be1d (patch)
treee65d9b4badac7ef5e1bfffd03dc6510c6594134b /gcc
parentb9b5fc0c2175b34131d9fd0805b1b307f754f4f0 (diff)
downloadgcc-b28491dc2d79763ecbff4f0b9f1f3e48a443be1d.zip
gcc-b28491dc2d79763ecbff4f0b9f1f3e48a443be1d.tar.gz
gcc-b28491dc2d79763ecbff4f0b9f1f3e48a443be1d.tar.bz2
analyzer: bulk merger/processing of runs of nodes at CFG join points
Prior to this patch the analyzer worklist considered only one node or two nodes at a time, processing and/or merging state individually or pairwise. This could lead to explosions of merger nodes at CFG join points, especially after switch statements, which could have large numbers of in-edges, and thus large numbers of merger exploded_nodes could be created, exceeding the per-point limit and thus stopping analysis with -Wanalyzer-too-complex. This patch special-cases the handling for runs of consecutive nodes in the worklist at a CFG join point, processing and merging them all together. The patch fixes a state explosion seen in bzip2.c seen when attempting to reproduce PR analyzer/95188, in a switch statement in a loop for argument parsing. With this patch, the analyzer successfully consolidates the state after the argument parsing to a single exploded node. In gcc.dg/analyzer/pr96653.c there is a switch statement with over 300 cases which leads to hitting the per-point limit. With this patch the consolidation code doesn't manage to merge all of them due to other worklist-ordering bugs, and it still hits the per-point limits, but it does manage some very long consolidations: merged 2 in-enodes into 2 out-enode(s) at SN: 403 merged 2 in-enodes into 2 out-enode(s) at SN: 403 merged 2 in-enodes into 1 out-enode(s) at SN: 11 merged 29 in-enodes into 1 out-enode(s) at SN: 35 merged 6 in-enodes into 1 out-enode(s) at SN: 41 merged 31 in-enodes into 1 out-enode(s) at SN: 35 and with a followup patch to fix an SCC issue it manages: merged 358 in-enodes into 2 out-enode(s) at SN: 402 The patch appears to fix the failure on non-x86_64 of: gcc.dg/analyzer/pr93032-mztools.c (test for excess errors) which is PR analyzer/96616. Unfortunately, the patch introduces a memory leak false positive in gcc.dg/analyzer/pr94851-1.c, but this appears to be a pre-existing bug that was hidden by state-merging failures. gcc/analyzer/ChangeLog: * engine.cc (exploded_node::dump_dot): Show STATUS_BULK_MERGED. (exploded_graph::process_worklist): Call maybe_process_run_of_before_supernode_enodes. (exploded_graph::maybe_process_run_of_before_supernode_enodes): New. (exploded_graph_annotator::print_enode): Show STATUS_BULK_MERGED. * exploded-graph.h (enum exploded_node::status): Add STATUS_BULK_MERGED. gcc/testsuite/ChangeLog: * gcc.dg/analyzer/bzip2-arg-parse-1.c: New test. * gcc.dg/analyzer/loop-n-down-to-1-by-1.c: Remove xfail. * gcc.dg/analyzer/pr94851-1.c: Add xfail.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/analyzer/engine.cc207
-rw-r--r--gcc/analyzer/exploded-graph.h4
-rw-r--r--gcc/testsuite/gcc.dg/analyzer/bzip2-arg-parse-1.c95
-rw-r--r--gcc/testsuite/gcc.dg/analyzer/loop-n-down-to-1-by-1.c4
-rw-r--r--gcc/testsuite/gcc.dg/analyzer/pr94851-1.c3
5 files changed, 310 insertions, 3 deletions
diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc
index 5903b19..53fafb5 100644
--- a/gcc/analyzer/engine.cc
+++ b/gcc/analyzer/engine.cc
@@ -848,6 +848,8 @@ exploded_node::dump_dot (graphviz_out *gv, const dump_args_t &args) const
pp_printf (pp, "EN: %i", m_index);
if (m_status == STATUS_MERGER)
pp_string (pp, " (merger)");
+ else if (m_status == STATUS_BULK_MERGED)
+ pp_string (pp, " (bulk merged)");
pp_newline (pp);
if (args.show_enode_details_p (*this))
@@ -2211,6 +2213,12 @@ exploded_graph::process_worklist ()
if (logger)
logger->log ("next to process: EN: %i", node->m_index);
+ /* If we have a run of nodes that are before-supernode, try merging and
+ processing them together, rather than pairwise or individually. */
+ if (flag_analyzer_state_merge && node != m_origin)
+ if (maybe_process_run_of_before_supernode_enodes (node))
+ goto handle_limit;
+
/* Avoid exponential explosions of nodes by attempting to merge
nodes that are at the same program point and which have
sufficiently similar state. */
@@ -2340,6 +2348,7 @@ exploded_graph::process_worklist ()
process_node (node);
+ handle_limit:
/* Impose a hard limit on the number of exploded nodes, to ensure
that the analysis terminates in the face of pathological state
explosion (or bugs).
@@ -2367,6 +2376,201 @@ exploded_graph::process_worklist ()
}
}
+/* Attempt to process a consecutive run of sufficiently-similar nodes in
+ the worklist at a CFG join-point (having already popped ENODE from the
+ head of the worklist).
+
+ If ENODE's point is of the form (before-supernode, SNODE) and the next
+ nodes in the worklist are a consecutive run of enodes of the same form,
+ for the same supernode as ENODE (but potentially from different in-edges),
+ process them all together, setting their status to STATUS_BULK_MERGED,
+ and return true.
+ Otherwise, return false, in which case ENODE must be processed in the
+ normal way.
+
+ When processing them all together, generate successor states based
+ on phi nodes for the appropriate CFG edges, and then attempt to merge
+ these states into a minimal set of merged successor states, partitioning
+ the inputs by merged successor state.
+
+ Create new exploded nodes for all of the merged states, and add edges
+ connecting the input enodes to the corresponding merger exploded nodes.
+
+ We hope we have a much smaller number of merged successor states
+ compared to the number of input enodes - ideally just one,
+ if all successor states can be merged.
+
+ Processing and merging many together as one operation rather than as
+ pairs avoids scaling issues where per-pair mergers could bloat the
+ graph with merger nodes (especially so after switch statements). */
+
+bool
+exploded_graph::
+maybe_process_run_of_before_supernode_enodes (exploded_node *enode)
+{
+ /* A struct for tracking per-input state. */
+ struct item
+ {
+ item (exploded_node *input_enode)
+ : m_input_enode (input_enode),
+ m_processed_state (input_enode->get_state ()),
+ m_merger_idx (-1)
+ {}
+
+ exploded_node *m_input_enode;
+ program_state m_processed_state;
+ int m_merger_idx;
+ };
+
+ gcc_assert (enode->get_status () == exploded_node::STATUS_WORKLIST);
+ gcc_assert (enode->m_succs.length () == 0);
+
+ const program_point &point = enode->get_point ();
+
+ if (point.get_kind () != PK_BEFORE_SUPERNODE)
+ return false;
+
+ const supernode *snode = point.get_supernode ();
+
+ logger * const logger = get_logger ();
+ LOG_SCOPE (logger);
+
+ /* Find a run of enodes in the worklist that are before the same supernode,
+ but potentially from different in-edges. */
+ auto_vec <exploded_node *> enodes;
+ enodes.safe_push (enode);
+ while (exploded_node *enode_2 = m_worklist.peek_next ())
+ {
+ gcc_assert (enode_2->get_status ()
+ == exploded_node::STATUS_WORKLIST);
+ gcc_assert (enode_2->m_succs.length () == 0);
+
+ const program_point &point_2 = enode_2->get_point ();
+
+ if (point_2.get_kind () == PK_BEFORE_SUPERNODE
+ && point_2.get_supernode () == snode
+ && point_2.get_call_string () == point.get_call_string ())
+ {
+ enodes.safe_push (enode_2);
+ m_worklist.take_next ();
+ }
+ else
+ break;
+ }
+
+ /* If the only node is ENODE, then give up. */
+ if (enodes.length () == 1)
+ return false;
+
+ if (logger)
+ logger->log ("got run of %i enodes for SN: %i",
+ enodes.length (), snode->m_index);
+
+ /* All of these enodes have a shared successor point (even if they
+ were for different in-edges). */
+ program_point next_point (point.get_next ());
+
+ /* Calculate the successor state for each enode in enodes. */
+ auto_delete_vec<item> items (enodes.length ());
+ unsigned i;
+ exploded_node *iter_enode;
+ FOR_EACH_VEC_ELT (enodes, i, iter_enode)
+ {
+ item *it = new item (iter_enode);
+ items.quick_push (it);
+ const program_state &state = iter_enode->get_state ();
+ program_state *next_state = &it->m_processed_state;
+ const program_point &iter_point = iter_enode->get_point ();
+ if (const superedge *iter_sedge = iter_point.get_from_edge ())
+ {
+ impl_region_model_context ctxt (*this, iter_enode,
+ &state, next_state, NULL);
+ const cfg_superedge *last_cfg_superedge
+ = iter_sedge->dyn_cast_cfg_superedge ();
+ if (last_cfg_superedge)
+ next_state->m_region_model->update_for_phis
+ (snode, last_cfg_superedge, &ctxt);
+ }
+ }
+
+ /* Attempt to partition the items into a set of merged states.
+ We hope we have a much smaller number of merged states
+ compared to the number of input enodes - ideally just one,
+ if all can be merged. */
+ auto_delete_vec <program_state> merged_states;
+ auto_vec<item *> first_item_for_each_merged_state;
+ item *it;
+ FOR_EACH_VEC_ELT (items, i, it)
+ {
+ const program_state &it_state = it->m_processed_state;
+ program_state *merged_state;
+ unsigned iter_merger_idx;
+ FOR_EACH_VEC_ELT (merged_states, iter_merger_idx, merged_state)
+ {
+ program_state merge (m_ext_state);
+ if (it_state.can_merge_with_p (*merged_state, next_point, &merge))
+ {
+ *merged_state = merge;
+ it->m_merger_idx = iter_merger_idx;
+ if (logger)
+ logger->log ("reusing merger state %i for item %i (EN: %i)",
+ it->m_merger_idx, i, it->m_input_enode->m_index);
+ goto got_merger;
+ }
+ }
+ /* If it couldn't be merged with any existing merged_states,
+ create a new one. */
+ if (it->m_merger_idx == -1)
+ {
+ it->m_merger_idx = merged_states.length ();
+ merged_states.safe_push (new program_state (it_state));
+ first_item_for_each_merged_state.safe_push (it);
+ if (logger)
+ logger->log ("using new merger state %i for item %i (EN: %i)",
+ it->m_merger_idx, i, it->m_input_enode->m_index);
+ }
+ got_merger:
+ gcc_assert (it->m_merger_idx >= 0);
+ gcc_assert (it->m_merger_idx < merged_states.length ());
+ }
+
+ /* Create merger nodes. */
+ auto_vec<exploded_node *> next_enodes (merged_states.length ());
+ program_state *merged_state;
+ FOR_EACH_VEC_ELT (merged_states, i, merged_state)
+ {
+ exploded_node *src_enode
+ = first_item_for_each_merged_state[i]->m_input_enode;
+ exploded_node *next
+ = get_or_create_node (next_point, *merged_state, src_enode);
+ /* "next" could be NULL; we handle that when adding the edges below. */
+ next_enodes.quick_push (next);
+ if (logger)
+ {
+ if (next)
+ logger->log ("using EN: %i for merger state %i", next->m_index, i);
+ else
+ logger->log ("using NULL enode for merger state %i", i);
+ }
+ }
+
+ /* Create edges from each input enode to the appropriate successor enode.
+ Update the status of the now-processed input enodes. */
+ FOR_EACH_VEC_ELT (items, i, it)
+ {
+ exploded_node *next = next_enodes[it->m_merger_idx];
+ if (next)
+ add_edge (it->m_input_enode, next, NULL);
+ it->m_input_enode->set_status (exploded_node::STATUS_BULK_MERGED);
+ }
+
+ if (logger)
+ logger->log ("merged %i in-enodes into %i out-enode(s) at SN: %i",
+ items.length (), merged_states.length (), snode->m_index);
+
+ return true;
+}
+
/* Return true if STMT must appear at the start of its exploded node, and
thus we can't consolidate its effects within a run of other statements,
where PREV_STMT was the previous statement. */
@@ -3944,6 +4148,9 @@ private:
case exploded_node::STATUS_MERGER:
pp_string (pp, "(M)");
break;
+ case exploded_node::STATUS_BULK_MERGED:
+ pp_string (pp, "(BM)");
+ break;
}
gv->end_tdtr ();
/* Dump any saved_diagnostics at this enode. */
diff --git a/gcc/analyzer/exploded-graph.h b/gcc/analyzer/exploded-graph.h
index e36f94e..5d4c319 100644
--- a/gcc/analyzer/exploded-graph.h
+++ b/gcc/analyzer/exploded-graph.h
@@ -161,6 +161,9 @@ class exploded_node : public dnode<eg_traits>
/* Node was left unprocessed due to merger; it won't have had
exploded_graph::process_node called on it. */
STATUS_MERGER,
+
+ /* Node was processed by maybe_process_run_of_before_supernode_enodes. */
+ STATUS_BULK_MERGED
};
exploded_node (const point_and_state &ps, int index);
@@ -730,6 +733,7 @@ public:
void build_initial_worklist ();
void process_worklist ();
+ bool maybe_process_run_of_before_supernode_enodes (exploded_node *node);
void process_node (exploded_node *node);
exploded_node *get_or_create_node (const program_point &point,
diff --git a/gcc/testsuite/gcc.dg/analyzer/bzip2-arg-parse-1.c b/gcc/testsuite/gcc.dg/analyzer/bzip2-arg-parse-1.c
new file mode 100644
index 0000000..1f1d829
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/bzip2-arg-parse-1.c
@@ -0,0 +1,95 @@
+/* Integration test to verify that we don't explode in this
+ argument-parsing logic.
+ Adapted from part of bzip2-1.0.8: bzip2.c: main. */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "analyzer-decls.h"
+
+/* This test file has been heavily modified from the bzip2.c original,
+ which has the following license boilerplate. */
+/* ------------------------------------------------------------------
+ This file is part of bzip2/libbzip2, a program and library for
+ lossless, block-sorting data compression.
+
+ bzip2/libbzip2 version 1.0.8 of 13 July 2019
+ Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
+
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
+ README file.
+
+ This program is released under the terms of the license contained
+ in the file LICENSE.
+ ------------------------------------------------------------------ */
+
+typedef char Char;
+typedef unsigned char Bool;
+typedef int Int32;
+
+#define True ((Bool)1)
+#define False ((Bool)0)
+
+typedef
+ struct zzzz {
+ Char *name;
+ struct zzzz *link;
+ }
+ Cell;
+
+Int32 verbosity;
+Bool keepInputFiles, smallMode;
+Bool forceOverwrite, noisy;
+Int32 blockSize100k;
+Int32 opMode;
+Int32 srcMode;
+Char *progName;
+
+extern void license ( void );
+extern void usage ( Char *fullProgName );
+
+void test (Cell *argList)
+{
+ Cell *aa;
+ Int32 i, j;
+
+ for (aa = argList; aa != NULL; aa = aa->link) {
+ if (aa->name[0] == '-' && aa->name[1] != '-') {
+ for (j = 1; aa->name[j] != '\0'; j++) {
+ switch (aa->name[j]) {
+ case 'c': srcMode = 2; break;
+ case 'd': opMode = 2; break;
+ case 'z': opMode = 1; break;
+ case 'f': forceOverwrite = True; break;
+ case 't': opMode = 3; break;
+ case 'k': keepInputFiles = True; break;
+ case 's': smallMode = True; break;
+ case 'q': noisy = False; break;
+ case '1': blockSize100k = 1; break;
+ case '2': blockSize100k = 2; break;
+ case '3': blockSize100k = 3; break;
+ case '4': blockSize100k = 4; break;
+ case '5': blockSize100k = 5; break;
+ case '6': blockSize100k = 6; break;
+ case '7': blockSize100k = 7; break;
+ case '8': blockSize100k = 8; break;
+ case '9': blockSize100k = 9; break;
+ case 'V':
+ case 'L': license(); break;
+ case 'v': verbosity++; break;
+ case 'h': usage ( progName );
+ exit ( 0 );
+ break;
+ default: fprintf ( stderr, "%s: Bad flag `%s'\n",
+ progName, aa->name );
+ usage ( progName );
+ exit ( 1 );
+ break;
+ }
+ }
+ }
+ }
+
+ /* The analyzer ought to be able to successfully merge all of the
+ above changes that can reach here into a single state. */
+ __analyzer_dump_exploded_nodes (0); /* { dg-warning "1 processed enode" } */
+}
diff --git a/gcc/testsuite/gcc.dg/analyzer/loop-n-down-to-1-by-1.c b/gcc/testsuite/gcc.dg/analyzer/loop-n-down-to-1-by-1.c
index e02a849d..553cd2f 100644
--- a/gcc/testsuite/gcc.dg/analyzer/loop-n-down-to-1-by-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/loop-n-down-to-1-by-1.c
@@ -24,8 +24,8 @@ void test(int n)
__analyzer_dump_exploded_nodes (0); /* { dg-warning "2 processed enodes" } */
}
- __analyzer_eval (i <= 0); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */
- /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
+ __analyzer_eval (i <= 0); /* { dg-warning "TRUE" "true" } */
+
__analyzer_eval (i == 0); /* { dg-warning "TRUE" "desired" { xfail *-*-* } } */
/* { dg-warning "UNKNOWN" "status quo" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/gcc.dg/analyzer/pr94851-1.c b/gcc/testsuite/gcc.dg/analyzer/pr94851-1.c
index 5657131..da79652 100644
--- a/gcc/testsuite/gcc.dg/analyzer/pr94851-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/pr94851-1.c
@@ -40,7 +40,8 @@ int pamark(void) {
last->m_next = p;
}
- p->m_name = (char)c;
+ p->m_name = (char)c; /* { dg-bogus "leak of 'p'" "bogus leak" { xfail *-*-* } } */
+ // TODO(xfail): related to PR analyzer/97072 and PR analyzer/97074
return 1;
}