aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Feng <ef2648@columbia.edu>2023-08-02 16:54:55 -0400
committerDavid Malcolm <dmalcolm@redhat.com>2023-08-02 16:58:58 -0400
commitfafe2d18f791c6b97b49af7c84b1b5703681c3af (patch)
tree2024b0d26ed3b846b9b6958004081dcb5a7459ed
parent41ef5a34161356817807be3a2e51fbdbe575ae85 (diff)
downloadgcc-fafe2d18f791c6b97b49af7c84b1b5703681c3af.zip
gcc-fafe2d18f791c6b97b49af7c84b1b5703681c3af.tar.gz
gcc-fafe2d18f791c6b97b49af7c84b1b5703681c3af.tar.bz2
analyzer: stash values for CPython plugin [PR107646]
This patch adds a hook to the end of ana::on_finish_translation_unit which calls relevant stashing-related callbacks registered during plugin initialization. This feature is used to stash named types and global variables for a CPython analyzer plugin [PR107646]. gcc/analyzer/ChangeLog: PR analyzer/107646 * analyzer-language.cc (run_callbacks): New function. (on_finish_translation_unit): New function. * analyzer-language.h (GCC_ANALYZER_LANGUAGE_H): New include. (class translation_unit): New vfuncs. gcc/c/ChangeLog: PR analyzer/107646 * c-parser.cc: New functions on stashing values for the analyzer. gcc/testsuite/ChangeLog: PR analyzer/107646 * gcc.dg/plugin/plugin.exp: Add new plugin and test. * gcc.dg/plugin/analyzer_cpython_plugin.c: New plugin. * gcc.dg/plugin/cpython-plugin-test-1.c: New test. Signed-off-by: Eric Feng <ef2648@columbia.edu>
-rw-r--r--gcc/analyzer/analyzer-language.cc22
-rw-r--r--gcc/analyzer/analyzer-language.h9
-rw-r--r--gcc/c/c-parser.cc24
-rw-r--r--gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c230
-rw-r--r--gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-1.c8
-rw-r--r--gcc/testsuite/gcc.dg/plugin/plugin.exp2
6 files changed, 295 insertions, 0 deletions
diff --git a/gcc/analyzer/analyzer-language.cc b/gcc/analyzer/analyzer-language.cc
index 2c891090..8540028 100644
--- a/gcc/analyzer/analyzer-language.cc
+++ b/gcc/analyzer/analyzer-language.cc
@@ -35,6 +35,26 @@ static GTY (()) hash_map <tree, tree> *analyzer_stashed_constants;
#if ENABLE_ANALYZER
namespace ana {
+static vec<finish_translation_unit_callback>
+ *finish_translation_unit_callbacks;
+
+void
+register_finish_translation_unit_callback (
+ finish_translation_unit_callback callback)
+{
+ if (!finish_translation_unit_callbacks)
+ vec_alloc (finish_translation_unit_callbacks, 1);
+ finish_translation_unit_callbacks->safe_push (callback);
+}
+
+static void
+run_callbacks (logger *logger, const translation_unit &tu)
+{
+ for (auto const &cb : finish_translation_unit_callbacks)
+ {
+ cb (logger, tu);
+ }
+}
/* Call into TU to try to find a value for NAME.
If found, stash its value within analyzer_stashed_constants. */
@@ -102,6 +122,8 @@ on_finish_translation_unit (const translation_unit &tu)
the_logger.set_logger (new logger (logfile, 0, 0,
*global_dc->printer));
stash_named_constants (the_logger.get_logger (), tu);
+
+ run_callbacks (the_logger.get_logger (), tu);
}
/* Lookup NAME in the named constants stashed when the frontend TU finished.
diff --git a/gcc/analyzer/analyzer-language.h b/gcc/analyzer/analyzer-language.h
index 00f85ab..8deea52 100644
--- a/gcc/analyzer/analyzer-language.h
+++ b/gcc/analyzer/analyzer-language.h
@@ -21,6 +21,8 @@ along with GCC; see the file COPYING3. If not see
#ifndef GCC_ANALYZER_LANGUAGE_H
#define GCC_ANALYZER_LANGUAGE_H
+#include "analyzer/analyzer-logging.h"
+
#if ENABLE_ANALYZER
namespace ana {
@@ -35,8 +37,15 @@ class translation_unit
have been seen). If it is defined and an integer (e.g. either as a
macro or enum), return the INTEGER_CST value, otherwise return NULL. */
virtual tree lookup_constant_by_id (tree id) const = 0;
+ virtual tree lookup_type_by_id (tree id) const = 0;
+ virtual tree lookup_global_var_by_id (tree id) const = 0;
};
+typedef void (*finish_translation_unit_callback)
+ (logger *, const translation_unit &);
+void register_finish_translation_unit_callback (
+ finish_translation_unit_callback callback);
+
/* Analyzer hook for frontends to call at the end of the TU. */
void on_finish_translation_unit (const translation_unit &tu);
diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index cf82b03..025817f 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -1695,6 +1695,30 @@ public:
return NULL_TREE;
}
+ tree
+ lookup_type_by_id (tree id) const final override
+ {
+ if (tree type_decl = lookup_name (id))
+ if (TREE_CODE (type_decl) == TYPE_DECL)
+ {
+ tree record_type = TREE_TYPE (type_decl);
+ if (TREE_CODE (record_type) == RECORD_TYPE)
+ return record_type;
+ }
+
+ return NULL_TREE;
+ }
+
+ tree
+ lookup_global_var_by_id (tree id) const final override
+ {
+ if (tree var_decl = lookup_name (id))
+ if (TREE_CODE (var_decl) == VAR_DECL)
+ return var_decl;
+
+ return NULL_TREE;
+ }
+
private:
/* Attempt to get an INTEGER_CST from MACRO.
Only handle the simplest cases: where MACRO's definition is a single
diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
new file mode 100644
index 0000000..9ecc42d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
@@ -0,0 +1,230 @@
+/* -fanalyzer plugin for CPython extension modules */
+/* { dg-options "-g" } */
+
+#define INCLUDE_MEMORY
+#include "gcc-plugin.h"
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "function.h"
+#include "basic-block.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "diagnostic-core.h"
+#include "graphviz.h"
+#include "options.h"
+#include "cgraph.h"
+#include "tree-dfa.h"
+#include "stringpool.h"
+#include "convert.h"
+#include "target.h"
+#include "fold-const.h"
+#include "tree-pretty-print.h"
+#include "diagnostic-color.h"
+#include "diagnostic-metadata.h"
+#include "tristate.h"
+#include "bitmap.h"
+#include "selftest.h"
+#include "function.h"
+#include "json.h"
+#include "analyzer/analyzer.h"
+#include "analyzer/analyzer-language.h"
+#include "analyzer/analyzer-logging.h"
+#include "ordered-hash-map.h"
+#include "options.h"
+#include "cgraph.h"
+#include "cfg.h"
+#include "digraph.h"
+#include "analyzer/supergraph.h"
+#include "sbitmap.h"
+#include "analyzer/call-string.h"
+#include "analyzer/program-point.h"
+#include "analyzer/store.h"
+#include "analyzer/region-model.h"
+#include "analyzer/call-details.h"
+#include "analyzer/call-info.h"
+#include "make-unique.h"
+
+int plugin_is_GPL_compatible;
+
+#if ENABLE_ANALYZER
+static GTY (()) hash_map<tree, tree> *analyzer_stashed_types;
+static GTY (()) hash_map<tree, tree> *analyzer_stashed_globals;
+
+namespace ana
+{
+static tree pyobj_record = NULL_TREE;
+static tree varobj_record = NULL_TREE;
+static tree pylistobj_record = NULL_TREE;
+static tree pylongobj_record = NULL_TREE;
+static tree pylongtype_vardecl = NULL_TREE;
+static tree pylisttype_vardecl = NULL_TREE;
+
+static tree
+get_field_by_name (tree type, const char *name)
+{
+ for (tree field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) == FIELD_DECL)
+ {
+ const char *field_name = IDENTIFIER_POINTER (DECL_NAME (field));
+ if (strcmp (field_name, name) == 0)
+ return field;
+ }
+ }
+ return NULL_TREE;
+}
+
+static void
+maybe_stash_named_type (logger *logger, const translation_unit &tu,
+ const char *name)
+{
+ LOG_FUNC_1 (logger, "name: %qs", name);
+ if (!analyzer_stashed_types)
+ analyzer_stashed_types = hash_map<tree, tree>::create_ggc ();
+
+ tree id = get_identifier (name);
+ if (tree t = tu.lookup_type_by_id (id))
+ {
+ gcc_assert (TREE_CODE (t) == RECORD_TYPE);
+ analyzer_stashed_types->put (id, t);
+ if (logger)
+ logger->log ("found %qs: %qE", name, t);
+ }
+ else
+ {
+ if (logger)
+ logger->log ("%qs: not found", name);
+ }
+}
+
+static void
+maybe_stash_global_var (logger *logger, const translation_unit &tu,
+ const char *name)
+{
+ LOG_FUNC_1 (logger, "name: %qs", name);
+ if (!analyzer_stashed_globals)
+ analyzer_stashed_globals = hash_map<tree, tree>::create_ggc ();
+
+ tree id = get_identifier (name);
+ if (tree t = tu.lookup_global_var_by_id (id))
+ {
+ gcc_assert (TREE_CODE (t) == VAR_DECL);
+ analyzer_stashed_globals->put (id, t);
+ if (logger)
+ logger->log ("found %qs: %qE", name, t);
+ }
+ else
+ {
+ if (logger)
+ logger->log ("%qs: not found", name);
+ }
+}
+
+static void
+stash_named_types (logger *logger, const translation_unit &tu)
+{
+ LOG_SCOPE (logger);
+
+ maybe_stash_named_type (logger, tu, "PyObject");
+ maybe_stash_named_type (logger, tu, "PyListObject");
+ maybe_stash_named_type (logger, tu, "PyVarObject");
+ maybe_stash_named_type (logger, tu, "PyLongObject");
+}
+
+static void
+stash_global_vars (logger *logger, const translation_unit &tu)
+{
+ LOG_SCOPE (logger);
+
+ maybe_stash_global_var (logger, tu, "PyLong_Type");
+ maybe_stash_global_var (logger, tu, "PyList_Type");
+}
+
+static tree
+get_stashed_type_by_name (const char *name)
+{
+ if (!analyzer_stashed_types)
+ return NULL_TREE;
+ tree id = get_identifier (name);
+ if (tree *slot = analyzer_stashed_types->get (id))
+ {
+ gcc_assert (TREE_CODE (*slot) == RECORD_TYPE);
+ return *slot;
+ }
+ return NULL_TREE;
+}
+
+static tree
+get_stashed_global_var_by_name (const char *name)
+{
+ if (!analyzer_stashed_globals)
+ return NULL_TREE;
+ tree id = get_identifier (name);
+ if (tree *slot = analyzer_stashed_globals->get (id))
+ {
+ gcc_assert (TREE_CODE (*slot) == VAR_DECL);
+ return *slot;
+ }
+ return NULL_TREE;
+}
+
+static void
+init_py_structs ()
+{
+ pyobj_record = get_stashed_type_by_name ("PyObject");
+ varobj_record = get_stashed_type_by_name ("PyVarObject");
+ pylistobj_record = get_stashed_type_by_name ("PyListObject");
+ pylongobj_record = get_stashed_type_by_name ("PyLongObject");
+ pylongtype_vardecl = get_stashed_global_var_by_name ("PyLong_Type");
+ pylisttype_vardecl = get_stashed_global_var_by_name ("PyList_Type");
+}
+
+void
+sorry_no_cpython_plugin ()
+{
+ sorry ("%qs definitions not found."
+ " Please ensure to %qs.)",
+ "Python/C API", "#include <Python.h>");
+}
+
+static void
+cpython_analyzer_init_cb (void *gcc_data, void * /*user_data */)
+{
+ ana::plugin_analyzer_init_iface *iface
+ = (ana::plugin_analyzer_init_iface *)gcc_data;
+ LOG_SCOPE (iface->get_logger ());
+ if (0)
+ inform (input_location, "got here: cpython_analyzer_init_cb");
+
+ init_py_structs ();
+
+ if (pyobj_record == NULL_TREE)
+ {
+ sorry_no_cpython_plugin ();
+ return;
+ }
+}
+} // namespace ana
+
+#endif /* #if ENABLE_ANALYZER */
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+ struct plugin_gcc_version *version)
+{
+#if ENABLE_ANALYZER
+ const char *plugin_name = plugin_info->base_name;
+ if (0)
+ inform (input_location, "got here; %qs", plugin_name);
+ ana::register_finish_translation_unit_callback (&stash_named_types);
+ ana::register_finish_translation_unit_callback (&stash_global_vars);
+ register_callback (plugin_info->base_name, PLUGIN_ANALYZER_INIT,
+ ana::cpython_analyzer_init_cb,
+ NULL); /* void *user_data */
+#else
+ sorry_no_analyzer ();
+#endif
+ return 0;
+} \ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-1.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-1.c
new file mode 100644
index 0000000..c105074
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-fanalyzer" } */
+/* { dg-require-effective-target analyzer } */
+/* { dg-message "'Python/C API' definitions not found. Please ensure to '#include <Python.h>'." "" { target *-*-* } 0 } */
+
+void test_no_python_plugin ()
+{
+} \ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp
index 60723a2..09c4539 100644
--- a/gcc/testsuite/gcc.dg/plugin/plugin.exp
+++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp
@@ -160,6 +160,8 @@ set plugin_test_list [list \
taint-CVE-2011-0521-5-fixed.c \
taint-CVE-2011-0521-6.c \
taint-antipatterns-1.c } \
+ { analyzer_cpython_plugin.c \
+ cpython-plugin-test-1.c } \
]
foreach plugin_test $plugin_test_list {