aboutsummaryrefslogtreecommitdiff
path: root/gcc/c
diff options
context:
space:
mode:
authorDavid Malcolm <dmalcolm@redhat.com>2022-11-15 13:53:42 -0500
committerDavid Malcolm <dmalcolm@redhat.com>2022-11-15 13:53:42 -0500
commitd8aba860b34203621586df8c5a6756b18c2a0c32 (patch)
tree51d723dac265a7f94898c0589103fd83c4970a30 /gcc/c
parent46c3d9c8e8f6becdb73bac8bcc2f0ba12d6b1d9c (diff)
downloadgcc-d8aba860b34203621586df8c5a6756b18c2a0c32.zip
gcc-d8aba860b34203621586df8c5a6756b18c2a0c32.tar.gz
gcc-d8aba860b34203621586df8c5a6756b18c2a0c32.tar.bz2
c, analyzer: support named constants in analyzer [PR106302]
The analyzer's file-descriptor state machine tracks the access mode of opened files, so that it can emit -Wanalyzer-fd-access-mode-mismatch. To do this, its symbolic execution needs to "know" the values of the constants "O_RDONLY", "O_WRONLY", and "O_ACCMODE". Currently analyzer/sm-fd.cc simply uses these values directly from the build-time header files, but these are the values on the host, not those from the target, which could be different (PR analyzer/106302). In an earlier discussion of this issue: https://gcc.gnu.org/pipermail/gcc/2022-June/238954.html we talked about adding a target hook for this. However, I've also been experimenting with extending the fd state machine to track sockets (PR analyzer/106140). For this, it's useful to "know" the values of the constants "SOCK_STREAM" and "SOCK_DGRAM". Unfortunately, these seem to have many arbitrary differences from target to target. For example: Linux/glibc general has SOCK_STREAM == 1, SOCK_DGRAM == 2, as does AIX, but annoyingly, e.g. Linux on MIPS has them the other way around. It seems to me that as the analyzer grows more ambitious modeling of the behavior of APIs (perhaps via plugins) it's more likely that the analyzer will need to know the values of named constants, which might not even exist on the host. For example, at LPC it was suggested to me that -fanalyzer could check rules about memory management inside the Linux kernel (probably via a plugin), but doing so involves a bunch of GFP_* flags (see PR 107472). So rather than trying to capture all this knowledge in a target hook, this patch attempts to get at named constant values from the user's source code. The patch adds an interface for frontends to call into the analyzer as the translation unit finishes. The analyzer can then call back into the frontend to ask about the values of the named constants it cares about whilst the frontend's data structures are still around. The patch implements this for the C frontend, which looks up the names by looking for named CONST_DECLs (which handles enum values). Failing that, it attempts to look up the values of macros but only the simplest cases are supported (a non-traditional macro with a single CPP_NUMBER token). It does this by building a buffer containing the macro definition and rerunning a lexer on it. The analyzer gracefully handles the cases where named values aren't found (such as anything more complicated than described above). The patch ports the analyzer to use this mechanism for "O_RDONLY", "O_WRONLY", and "O_ACCMODE". I have successfully tested my socket patch to also use this for "SOCK_STREAM" and "SOCK_DGRAM", so the technique seems to work. gcc/ChangeLog: PR analyzer/106302 * Makefile.in (ANALYZER_OBJS): Add analyzer/analyzer-language.o. (GTFILES): Add analyzer/analyzer-language.cc. * doc/analyzer.texi: Document __analyzer_dump_named_constant. gcc/analyzer/ChangeLog: PR analyzer/106302 * analyzer-language.cc: New file. * analyzer-language.h: New file. * analyzer.h (get_stashed_constant_by_name): New decl. (log_stashed_constants): New decl. * engine.cc (impl_run_checkers): Call log_stashed_constants. * region-model-impl-calls.cc (region_model::impl_call_analyzer_dump_named_constant): New. * region-model.cc (region_model::on_stmt_pre): Handle __analyzer_dump_named_constant. * region-model.h (region_model::impl_call_analyzer_dump_named_constant): New decl. * sm-fd.cc (fd_state_machine::m_O_ACCMODE): New. (fd_state_machine::m_O_RDONLY): New. (fd_state_machine::m_O_WRONLY): New. (fd_state_machine::fd_state_machine): Initialize the new fields. (fd_state_machine::get_access_mode_from_flag): Use the new fields, rather than using the host values. gcc/c/ChangeLog: PR analyzer/106302 * c-parser.cc: Include "analyzer/analyzer-language.h" and "toplev.h". (class ana::c_translation_unit): New. (c_parser_translation_unit): Call ana::on_finish_translation_unit. gcc/testsuite/ChangeLog: * gcc.dg/analyzer/analyzer-decls.h (__analyzer_dump_named_constant): New decl. * gcc.dg/analyzer/fd-4.c (void): Likewise. (O_ACCMODE): Define. * gcc.dg/analyzer/fd-access-mode-enum.c: New test, based on . * gcc.dg/analyzer/fd-5.c: ...this. Rename to... * gcc.dg/analyzer/fd-access-mode-macros.c: ...this. (O_ACCMODE): Define. * gcc.dg/analyzer/fd-access-mode-target-headers.c: New test, also based on fd-5.c. (test_sm_fd_constants): New. * gcc.dg/analyzer/fd-dup-1.c (O_ACCMODE): Define. * gcc.dg/analyzer/named-constants-via-enum.c: New test. * gcc.dg/analyzer/named-constants-via-enum-and-macro.c: New test. * gcc.dg/analyzer/named-constants-via-macros-2.c: New test. * gcc.dg/analyzer/named-constants-via-macros.c: New test. Signed-off-by: David Malcolm <dmalcolm@redhat.com>
Diffstat (limited to 'gcc/c')
-rw-r--r--gcc/c/c-parser.cc90
1 files changed, 90 insertions, 0 deletions
diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 1d144bb..f3c7999 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -72,6 +72,8 @@ along with GCC; see the file COPYING3. If not see
#include "memmodel.h"
#include "c-family/known-headers.h"
#include "bitmap.h"
+#include "analyzer/analyzer-language.h"
+#include "toplev.h"
/* We need to walk over decls with incomplete struct/union/enum types
after parsing the whole translation unit.
@@ -1664,6 +1666,86 @@ static bool c_parser_objc_diagnose_bad_element_prefix
(c_parser *, struct c_declspecs *);
static location_t c_parser_parse_rtl_body (c_parser *, char *);
+#if ENABLE_ANALYZER
+
+namespace ana {
+
+/* Concrete implementation of ana::translation_unit for the C frontend. */
+
+class c_translation_unit : public translation_unit
+{
+public:
+ /* Implementation of translation_unit::lookup_constant_by_id for use by the
+ analyzer to look up named constants in the user's source code. */
+ tree lookup_constant_by_id (tree id) const final override
+ {
+ /* Consider decls. */
+ if (tree decl = lookup_name (id))
+ if (TREE_CODE (decl) == CONST_DECL)
+ if (tree value = DECL_INITIAL (decl))
+ if (TREE_CODE (value) == INTEGER_CST)
+ return value;
+
+ /* Consider macros. */
+ cpp_hashnode *hashnode = C_CPP_HASHNODE (id);
+ if (cpp_macro_p (hashnode))
+ if (tree value = consider_macro (hashnode->value.macro))
+ return value;
+
+ return NULL_TREE;
+ }
+
+private:
+ /* Attempt to get an INTEGER_CST from MACRO.
+ Only handle the simplest cases: where MACRO's definition is a single
+ token containing a number, by lexing the number again.
+ This will handle e.g.
+ #define NAME 42
+ and other bases but not negative numbers, parentheses or e.g.
+ #define NAME 1 << 7
+ as doing so would require a parser. */
+ tree consider_macro (cpp_macro *macro) const
+ {
+ if (macro->paramc > 0)
+ return NULL_TREE;
+ if (macro->kind != cmk_macro)
+ return NULL_TREE;
+ if (macro->count != 1)
+ return NULL_TREE;
+ const cpp_token &tok = macro->exp.tokens[0];
+ if (tok.type != CPP_NUMBER)
+ return NULL_TREE;
+
+ cpp_reader *old_parse_in = parse_in;
+ parse_in = cpp_create_reader (CLK_GNUC89, ident_hash, line_table);
+
+ pretty_printer pp;
+ pp_string (&pp, (const char *) tok.val.str.text);
+ pp_newline (&pp);
+ cpp_push_buffer (parse_in,
+ (const unsigned char *) pp_formatted_text (&pp),
+ strlen (pp_formatted_text (&pp)),
+ 0);
+
+ tree value;
+ location_t loc;
+ unsigned char cpp_flags;
+ c_lex_with_flags (&value, &loc, &cpp_flags, 0);
+
+ cpp_destroy (parse_in);
+ parse_in = old_parse_in;
+
+ if (value && TREE_CODE (value) == INTEGER_CST)
+ return value;
+
+ return NULL_TREE;
+ }
+};
+
+} // namespace ana
+
+#endif /* #if ENABLE_ANALYZER */
+
/* Parse a translation unit (C90 6.7, C99 6.9, C11 6.9).
translation-unit:
@@ -1724,6 +1806,14 @@ c_parser_translation_unit (c_parser *parser)
"#pragma omp begin assumes", "#pragma omp end assumes");
current_omp_begin_assumes = 0;
}
+
+#if ENABLE_ANALYZER
+ if (flag_analyzer)
+ {
+ ana::c_translation_unit tu;
+ ana::on_finish_translation_unit (tu);
+ }
+#endif
}
/* Parse an external declaration (C90 6.7, C99 6.9, C11 6.9).