/* Proof-of-concept of a -fanalyzer plugin. Detect (some) uses of CPython API outside of the Global Interpreter Lock. https://docs.python.org/3/c-api/init.html#thread-state-and-the-global-interpreter-lock */ /* { dg-options "-g" } */ #define INCLUDE_MEMORY #define INCLUDE_VECTOR #include "gcc-plugin.h" #include "config.h" #include "system.h" #include "coretypes.h" #include "diagnostic.h" #include "tree.h" #include "gimple.h" #include "gimple-iterator.h" #include "gimple-walk.h" #include "diagnostic-event-id.h" #include "analyzer/common.h" #include "analyzer/analyzer-logging.h" #include "json.h" #include "analyzer/sm.h" #include "analyzer/pending-diagnostic.h" int plugin_is_GPL_compatible; #if ENABLE_ANALYZER namespace ana { static bool type_based_on_pyobject_p (tree type) { /* Ideally we'd also check for "subclasses" here by iterating up the first field of each struct. */ if (TREE_CODE (type) != RECORD_TYPE) return false; tree name = TYPE_IDENTIFIER (type); if (!name) return false; return id_equal (name, "PyObject"); } /* An experimental state machine, for tracking whether the GIL is held, as global state.. */ class gil_state_machine : public state_machine { public: gil_state_machine (logger *logger); bool inherited_state_p () const final override { return false; } bool on_stmt (sm_context &sm_ctxt, const supernode *node, const gimple *stmt) const final override; bool can_purge_p (state_t s) const final override; void check_for_pyobject_usage_without_gil (sm_context &sm_ctxt, const supernode *node, const gimple *stmt, tree op) const; private: void check_for_pyobject_in_call (sm_context &sm_ctxt, const supernode *node, const gcall &call, tree callee_fndecl) const; public: /* These states are "global", rather than per-expression. */ /* State for when we've released the GIL. */ state_t m_released_gil; /* Stop state. */ state_t m_stop; }; /* Subclass for diagnostics involving the GIL. */ class gil_diagnostic : public pending_diagnostic { public: /* There isn't a warning ID for us to use. */ int get_controlling_option () const final override { return 0; } location_t fixup_location (location_t loc, bool) const final override { /* Ideally we'd check for specific macros here, and only resolve certain macros. */ if (linemap_location_from_macro_expansion_p (line_table, loc)) loc = linemap_resolve_location (line_table, loc, LRK_MACRO_EXPANSION_POINT, NULL); return loc; } bool describe_state_change (pretty_printer &pp, const evdesc::state_change &change) final override { if (change.is_global_p () && change.m_new_state == m_sm.m_released_gil) { pp_string (&pp, "releasing the GIL here"); return true; } if (change.is_global_p () && change.m_new_state == m_sm.get_start_state ()) { pp_string (&pp, "acquiring the GIL here"); return true; } return false; } diagnostic_event::meaning get_meaning_for_state_change (const evdesc::state_change &change) const final override { if (change.is_global_p ()) { if (change.m_new_state == m_sm.m_released_gil) return diagnostic_event::meaning (diagnostic_event::VERB_release, diagnostic_event::NOUN_lock); else if (change.m_new_state == m_sm.get_start_state ()) return diagnostic_event::meaning (diagnostic_event::VERB_acquire, diagnostic_event::NOUN_lock); } return diagnostic_event::meaning (); } protected: gil_diagnostic (const gil_state_machine &sm) : m_sm (sm) { } private: const gil_state_machine &m_sm; }; class double_save_thread : public gil_diagnostic { public: double_save_thread (const gil_state_machine &sm, const gcall &call) : gil_diagnostic (sm), m_call (call) {} const char *get_kind () const final override { return "double_save_thread"; } bool subclass_equal_p (const pending_diagnostic &base_other) const override { const double_save_thread &sub_other = (const double_save_thread &)base_other; return &m_call == &sub_other.m_call; } bool emit (diagnostic_emission_context &ctxt) final override { return ctxt.warn ("nested usage of %qs", "Py_BEGIN_ALLOW_THREADS"); } bool describe_final_event (pretty_printer &pp, const evdesc::final_event &ev) final override { pp_printf (&pp, "nested usage of %qs here", "Py_BEGIN_ALLOW_THREADS"); return true; } private: const gcall &m_call; }; class fncall_without_gil : public gil_diagnostic { public: fncall_without_gil (const gil_state_machine &sm, const gcall &call, tree callee_fndecl, unsigned arg_idx) : gil_diagnostic (sm), m_call (call), m_callee_fndecl (callee_fndecl), m_arg_idx (arg_idx) {} const char *get_kind () const final override { return "fncall_without_gil"; } bool subclass_equal_p (const pending_diagnostic &base_other) const override { const fncall_without_gil &sub_other = (const fncall_without_gil &)base_other; return (&m_call == &sub_other.m_call && m_callee_fndecl == sub_other.m_callee_fndecl && m_arg_idx == sub_other.m_arg_idx); } bool emit (diagnostic_emission_context &ctxt) final override { if (m_callee_fndecl) return ctxt.warn ("use of PyObject as argument %i of %qE" " without the GIL", m_arg_idx + 1, m_callee_fndecl); else return ctxt.warn ("use of PyObject as argument %i of call" " without the GIL", m_arg_idx + 1, m_callee_fndecl); } bool describe_final_event (pretty_printer &pp, const evdesc::final_event &ev) final override { if (m_callee_fndecl) pp_printf (&pp, "use of PyObject as argument %i of %qE here without the GIL", m_arg_idx + 1, m_callee_fndecl); else pp_printf (&pp, "use of PyObject as argument %i of call here without the GIL", m_arg_idx + 1, m_callee_fndecl); return true; } private: const gcall &m_call; tree m_callee_fndecl; unsigned m_arg_idx; }; class pyobject_usage_without_gil : public gil_diagnostic { public: pyobject_usage_without_gil (const gil_state_machine &sm, tree expr) : gil_diagnostic (sm), m_expr (expr) {} const char *get_kind () const final override { return "pyobject_usage_without_gil"; } bool subclass_equal_p (const pending_diagnostic &base_other) const override { return same_tree_p (m_expr, ((const pyobject_usage_without_gil&)base_other).m_expr); } bool emit (diagnostic_emission_context &ctxt) final override { return ctxt.warn ("use of PyObject %qE without the GIL", m_expr); } bool describe_final_event (pretty_printer &pp, const evdesc::final_event &ev) final override { pp_printf (&pp, "PyObject %qE used here without the GIL", m_expr); return true; } private: tree m_expr; }; /* gil_state_machine's ctor. */ gil_state_machine::gil_state_machine (logger *logger) : state_machine ("gil", logger) { m_released_gil = add_state ("released_gil"); m_stop = add_state ("stop"); } struct cb_data { cb_data (const gil_state_machine &sm, sm_context &sm_ctxt, const supernode *snode, const gimple *stmt) : m_sm (sm), m_sm_ctxt (sm_ctxt), m_snode (snode), m_stmt (stmt) { } const gil_state_machine &m_sm; sm_context &m_sm_ctxt; const supernode *m_snode; const gimple *m_stmt; }; static bool check_for_pyobject (gimple *, tree op, tree, void *data) { cb_data *d = (cb_data *)data; d->m_sm.check_for_pyobject_usage_without_gil (d->m_sm_ctxt, d->m_snode, d->m_stmt, op); return true; } /* Assuming that the GIL has been released, complain about any PyObject * arguments passed to CALL. */ void gil_state_machine::check_for_pyobject_in_call (sm_context &sm_ctxt, const supernode *node, const gcall &call, tree callee_fndecl) const { for (unsigned i = 0; i < gimple_call_num_args (&call); i++) { tree arg = gimple_call_arg (&call, i); if (TREE_CODE (TREE_TYPE (arg)) != POINTER_TYPE) continue; tree type = TREE_TYPE (TREE_TYPE (arg)); if (type_based_on_pyobject_p (type)) { sm_ctxt.warn (node, &call, NULL_TREE, std::make_unique (*this, call, callee_fndecl, i)); sm_ctxt.set_global_state (m_stop); } } } /* Implementation of state_machine::on_stmt vfunc for gil_state_machine. */ bool gil_state_machine::on_stmt (sm_context &sm_ctxt, const supernode *node, const gimple *stmt) const { const state_t global_state = sm_ctxt.get_global_state (); if (const gcall *call_stmt = dyn_cast (stmt)) { const gcall &call = *call_stmt; if (tree callee_fndecl = sm_ctxt.get_fndecl_for_call (call)) { if (is_named_call_p (callee_fndecl, "PyEval_SaveThread", call, 0)) { if (0) inform (input_location, "found call to %qs", "PyEval_SaveThread"); if (global_state == m_released_gil) { sm_ctxt.warn (node, stmt, NULL_TREE, std::make_unique (*this, call)); sm_ctxt.set_global_state (m_stop); } else sm_ctxt.set_global_state (m_released_gil); return true; } else if (is_named_call_p (callee_fndecl, "PyEval_RestoreThread", call, 1)) { if (0) inform (input_location, "found call to %qs", "PyEval_SaveThread"); if (global_state == m_released_gil) sm_ctxt.set_global_state (m_start); return true; } else if (global_state == m_released_gil) { /* Find PyObject * args of calls to fns with unknown bodies. */ if (!fndecl_has_gimple_body_p (callee_fndecl)) check_for_pyobject_in_call (sm_ctxt, node, call, callee_fndecl); } } else if (global_state == m_released_gil) check_for_pyobject_in_call (sm_ctxt, node, call, NULL); } else if (global_state == m_released_gil) { /* Walk the stmt, finding uses of PyObject (or "subclasses"). */ cb_data d (*this, sm_ctxt, node, stmt); walk_stmt_load_store_addr_ops (const_cast (stmt), &d, check_for_pyobject, check_for_pyobject, check_for_pyobject); } return false; } bool gil_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const { return true; } void gil_state_machine::check_for_pyobject_usage_without_gil (sm_context &sm_ctxt, const supernode *node, const gimple *stmt, tree op) const { tree type = TREE_TYPE (op); if (type_based_on_pyobject_p (type)) { sm_ctxt.warn (node, stmt, NULL_TREE, std::make_unique (*this, op)); sm_ctxt.set_global_state (m_stop); } } /* Callback handler for the PLUGIN_ANALYZER_INIT event. */ static void gil_analyzer_init_cb (void *gcc_data, void */*user_data*/) { ana::plugin_analyzer_init_iface *iface = (ana::plugin_analyzer_init_iface *)gcc_data; LOG_SCOPE (iface->get_logger ()); if (0) inform (input_location, "got here: gil_analyzer_init_cb"); iface->register_state_machine (std::make_unique (iface->get_logger ())); } } // namespace ana #endif /* #if ENABLE_ANALYZER */ int plugin_init (struct plugin_name_args *plugin_info, struct plugin_gcc_version *version) { #if ENABLE_ANALYZER const char *plugin_name = plugin_info->base_name; if (0) inform (input_location, "got here; %qs", plugin_name); register_callback (plugin_info->base_name, PLUGIN_ANALYZER_INIT, ana::gil_analyzer_init_cb, NULL); /* void *user_data */ #else sorry_no_analyzer (); #endif return 0; }