diff options
author | David Malcolm <dmalcolm@redhat.com> | 2023-08-24 10:24:40 -0400 |
---|---|---|
committer | David Malcolm <dmalcolm@redhat.com> | 2023-08-24 10:24:40 -0400 |
commit | bbdc0e0d0042ae16aa4d09ceb52c71e746d9139d (patch) | |
tree | b76244adc66be5454e78f1a4d4740d7b1ad7615c /gcc | |
parent | 2bad0eeb5573e52c4b7b51546ecffcb17f46eda3 (diff) | |
download | gcc-bbdc0e0d0042ae16aa4d09ceb52c71e746d9139d.zip gcc-bbdc0e0d0042ae16aa4d09ceb52c71e746d9139d.tar.gz gcc-bbdc0e0d0042ae16aa4d09ceb52c71e746d9139d.tar.bz2 |
analyzer: implement kf_strcat [PR105899]
gcc/analyzer/ChangeLog:
PR analyzer/105899
* call-details.cc
(call_details::check_for_null_terminated_string_arg): Split into
overloads, one taking just an arg_idx, the other a new
"include_terminator" param.
* call-details.h: Likewise.
* kf.cc (class kf_strcat): New.
(kf_strcpy::impl_call_pre): Update for change to
check_for_null_terminated_string_arg.
(register_known_functions): Register kf_strcat.
* region-model.cc
(region_model::check_for_null_terminated_string_arg): Split into
overloads, one taking just an arg_idx, the other a new
"include_terminator" param. When returning an svalue, handle
"include_terminator" being false by subtracting one.
* region-model.h
(region_model::check_for_null_terminated_string_arg): Split into
overloads, one taking just an arg_idx, the other a new
"include_terminator" param.
gcc/ChangeLog:
PR analyzer/105899
* doc/invoke.texi (Static Analyzer Options): Add "strcat" to the
list of functions known to the analyzer.
gcc/testsuite/ChangeLog:
PR analyzer/105899
* gcc.dg/analyzer/strcat-1.c: New test.
Signed-off-by: David Malcolm <dmalcolm@redhat.com>
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/analyzer/call-details.cc | 12 | ||||
-rw-r--r-- | gcc/analyzer/call-details.h | 5 | ||||
-rw-r--r-- | gcc/analyzer/kf.cc | 72 | ||||
-rw-r--r-- | gcc/analyzer/region-model.cc | 63 | ||||
-rw-r--r-- | gcc/analyzer/region-model.h | 6 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 1 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/analyzer/strcat-1.c | 136 |
7 files changed, 275 insertions, 20 deletions
diff --git a/gcc/analyzer/call-details.cc b/gcc/analyzer/call-details.cc index 8f5b28c..ce1f859 100644 --- a/gcc/analyzer/call-details.cc +++ b/gcc/analyzer/call-details.cc @@ -386,13 +386,23 @@ call_details::lookup_function_attribute (const char *attr_name) const return lookup_attribute (attr_name, TYPE_ATTRIBUTES (allocfntype)); } +void +call_details::check_for_null_terminated_string_arg (unsigned arg_idx) const +{ + check_for_null_terminated_string_arg (arg_idx, false, nullptr); +} + const svalue * call_details:: check_for_null_terminated_string_arg (unsigned arg_idx, + bool include_terminator, const svalue **out_sval) const { region_model *model = get_model (); - return model->check_for_null_terminated_string_arg (*this, arg_idx, out_sval); + return model->check_for_null_terminated_string_arg (*this, + arg_idx, + include_terminator, + out_sval); } } // namespace ana diff --git a/gcc/analyzer/call-details.h b/gcc/analyzer/call-details.h index 58b5ccd..ae528e4 100644 --- a/gcc/analyzer/call-details.h +++ b/gcc/analyzer/call-details.h @@ -72,9 +72,12 @@ public: tree lookup_function_attribute (const char *attr_name) const; + void + check_for_null_terminated_string_arg (unsigned arg_idx) const; const svalue * check_for_null_terminated_string_arg (unsigned arg_idx, - const svalue **out_sval = nullptr) const; + bool include_terminator, + const svalue **out_sval) const; private: const gcall *m_call; diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc index 3eddbe2..36d9d10 100644 --- a/gcc/analyzer/kf.cc +++ b/gcc/analyzer/kf.cc @@ -1106,6 +1106,61 @@ public: /* Currently a no-op. */ }; +/* Handler for "strcat" and "__builtin_strcat_chk". */ + +class kf_strcat : public known_function +{ +public: + kf_strcat (unsigned int num_args) : m_num_args (num_args) {} + bool matches_call_types_p (const call_details &cd) const final override + { + return (cd.num_args () == m_num_args + && cd.arg_is_pointer_p (0) + && cd.arg_is_pointer_p (1)); + } + + void impl_call_pre (const call_details &cd) const final override + { + region_model *model = cd.get_model (); + region_model_manager *mgr = cd.get_manager (); + + const svalue *dest_sval = cd.get_arg_svalue (0); + const region *dest_reg = model->deref_rvalue (dest_sval, cd.get_arg_tree (0), + cd.get_ctxt ()); + + const svalue *dst_strlen_sval + = cd.check_for_null_terminated_string_arg (0, false, nullptr); + if (!dst_strlen_sval) + { + if (cd.get_ctxt ()) + cd.get_ctxt ()->terminate_path (); + return; + } + + const svalue *bytes_to_copy; + const svalue *num_src_bytes_read_sval + = cd.check_for_null_terminated_string_arg (1, true, &bytes_to_copy); + if (!num_src_bytes_read_sval) + { + if (cd.get_ctxt ()) + cd.get_ctxt ()->terminate_path (); + return; + } + + cd.maybe_set_lhs (dest_sval); + + const region *offset_reg + = mgr->get_offset_region (dest_reg, NULL_TREE, dst_strlen_sval); + model->write_bytes (offset_reg, + num_src_bytes_read_sval, + bytes_to_copy, + cd.get_ctxt ()); + } + +private: + unsigned int m_num_args; +}; + /* Handler for "strcpy" and "__builtin_strcpy_chk". */ class kf_strcpy : public known_function @@ -1139,7 +1194,7 @@ kf_strcpy::impl_call_pre (const call_details &cd) const const svalue *bytes_to_copy; if (const svalue *num_bytes_read_sval - = cd.check_for_null_terminated_string_arg (1, &bytes_to_copy)) + = cd.check_for_null_terminated_string_arg (1, true, &bytes_to_copy)) { model->write_bytes (dest_reg, num_bytes_read_sval, bytes_to_copy, ctxt); } @@ -1188,16 +1243,10 @@ public: } void impl_call_pre (const call_details &cd) const final override { - if (const svalue *bytes_read = cd.check_for_null_terminated_string_arg (0)) - if (bytes_read->get_kind () != SK_UNKNOWN) + if (const svalue *strlen_sval + = cd.check_for_null_terminated_string_arg (0, false, nullptr)) + if (strlen_sval->get_kind () != SK_UNKNOWN) { - region_model_manager *mgr = cd.get_manager (); - /* strlen is (bytes_read - 1). */ - const svalue *one = mgr->get_or_create_int_cst (size_type_node, 1); - const svalue *strlen_sval = mgr->get_or_create_binop (size_type_node, - MINUS_EXPR, - bytes_read, - one); cd.maybe_set_lhs (strlen_sval); return; } @@ -1415,6 +1464,8 @@ register_known_functions (known_function_manager &kfm) kfm.add (BUILT_IN_SPRINTF, make_unique<kf_sprintf> ()); kfm.add (BUILT_IN_STACK_RESTORE, make_unique<kf_stack_restore> ()); kfm.add (BUILT_IN_STACK_SAVE, make_unique<kf_stack_save> ()); + kfm.add (BUILT_IN_STRCAT, make_unique<kf_strcat> (2)); + kfm.add (BUILT_IN_STRCAT_CHK, make_unique<kf_strcat> (3)); kfm.add (BUILT_IN_STRCHR, make_unique<kf_strchr> ()); kfm.add (BUILT_IN_STRCPY, make_unique<kf_strcpy> (2)); kfm.add (BUILT_IN_STRCPY_CHK, make_unique<kf_strcpy> (3)); @@ -1429,6 +1480,7 @@ register_known_functions (known_function_manager &kfm) /* Known builtins and C standard library functions. */ { kfm.add ("memset", make_unique<kf_memset> ()); + kfm.add ("strcat", make_unique<kf_strcat> (2)); kfm.add ("strdup", make_unique<kf_strdup> ()); kfm.add ("strndup", make_unique<kf_strndup> ()); } diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 025b555..02c073c 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -3679,9 +3679,41 @@ region_model::scan_for_null_terminator (const region *reg, - the buffer pointed to has any uninitalized bytes before any 0-terminator - any of the reads aren't within the bounds of the underlying base region - Otherwise, return a svalue for the number of bytes read (strlen + 1), - and, if OUT_SVAL is non-NULL, write to *OUT_SVAL with an svalue - representing the content of the buffer up to and including the terminator. + Otherwise, return a svalue for strlen of the buffer (*not* including + the null terminator). + + TODO: we should also complain if: + - the pointer is NULL (or could be). */ + +void +region_model::check_for_null_terminated_string_arg (const call_details &cd, + unsigned arg_idx) +{ + check_for_null_terminated_string_arg (cd, + arg_idx, + false, /* include_terminator */ + nullptr); // out_sval +} + + +/* Check that argument ARG_IDX (0-based) to the call described by CD + is a pointer to a valid null-terminated string. + + Simulate scanning through the buffer, reading until we find a 0 byte + (equivalent to calling strlen). + + Complain and return NULL if: + - the buffer pointed to isn't null-terminated + - the buffer pointed to has any uninitalized bytes before any 0-terminator + - any of the reads aren't within the bounds of the underlying base region + + Otherwise, return a svalue. This will be the number of bytes read + (including the null terminator) if INCLUDE_TERMINATOR is true, or strlen + of the buffer (not including the null terminator) if it is false. + + Also, when returning an svalue, if OUT_SVAL is non-NULL, write to + *OUT_SVAL with an svalue representing the content of the buffer up to + and including the terminator. TODO: we should also complain if: - the pointer is NULL (or could be). */ @@ -3689,6 +3721,7 @@ region_model::scan_for_null_terminator (const region *reg, const svalue * region_model::check_for_null_terminated_string_arg (const call_details &cd, unsigned arg_idx, + bool include_terminator, const svalue **out_sval) { class null_terminator_check_event : public custom_event @@ -3786,10 +3819,26 @@ region_model::check_for_null_terminated_string_arg (const call_details &cd, const region *buf_reg = deref_rvalue (arg_sval, cd.get_arg_tree (arg_idx), &my_ctxt); - return scan_for_null_terminator (buf_reg, - cd.get_arg_tree (arg_idx), - out_sval, - &my_ctxt); + if (const svalue *num_bytes_read_sval + = scan_for_null_terminator (buf_reg, + cd.get_arg_tree (arg_idx), + out_sval, + &my_ctxt)) + { + if (include_terminator) + return num_bytes_read_sval; + else + { + /* strlen is (bytes_read - 1). */ + const svalue *one = m_mgr->get_or_create_int_cst (size_type_node, 1); + return m_mgr->get_or_create_binop (size_type_node, + MINUS_EXPR, + num_bytes_read_sval, + one); + } + } + else + return nullptr; } /* Remove all bindings overlapping REG within the store. */ diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index b1c705e..4025962 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -519,10 +519,14 @@ class region_model const svalue *sval_hint, region_model_context *ctxt) const; + void + check_for_null_terminated_string_arg (const call_details &cd, + unsigned idx); const svalue * check_for_null_terminated_string_arg (const call_details &cd, unsigned idx, - const svalue **out_sval = nullptr); + bool include_terminator, + const svalue **out_sval); private: const region *get_lvalue_1 (path_var pv, region_model_context *ctxt) const; diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index bfca018..a32dabf 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -11108,6 +11108,7 @@ and of the following functions: @item @code{siglongjmp} @item @code{signal} @item @code{sigsetjmp} +@item @code{strcat} @item @code{strchr} @item @code{strlen} @end itemize diff --git a/gcc/testsuite/gcc.dg/analyzer/strcat-1.c b/gcc/testsuite/gcc.dg/analyzer/strcat-1.c new file mode 100644 index 0000000..e3b698a --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/strcat-1.c @@ -0,0 +1,136 @@ +/* See e.g. https://en.cppreference.com/w/c/string/byte/strcat */ + +#include "analyzer-decls.h" + +char *strcat (char *dest, const char *src); +#define NULL ((void *)0) + +char * +test_passthrough (char *dest, const char *src) +{ + return strcat (dest, src); +} + +char * +test_null_dest (const char *src) +{ + return strcat (NULL, src); /* { dg-warning "use of NULL where non-null expected" } */ +} + +char * +test_null_src (char *dest) +{ + return strcat (dest, NULL); /* { dg-warning "use of NULL where non-null expected" } */ +} + +char * +test_uninit_dest (const char *src) +{ + char dest[10]; + return strcat (dest, src); /* { dg-warning "use of uninitialized value 'dest\\\[0\\\]'" } */ +} + +char * +test_uninit_src (char *dest) +{ + const char src[10]; + return strcat (dest, src); /* { dg-warning "use of uninitialized value 'src\\\[0\\\]'" } */ +} + +char * +test_dest_not_terminated (char *src) +{ + char dest[3] = "foo"; + return strcat (dest, src); /* { dg-warning "stack-based buffer over-read" } */ + /* { dg-message "while looking for null terminator for argument 1 \\('&dest'\\) of 'strcat'" "" { target *-*-* } .-1 } */ +} + +char * +test_src_not_terminated (char *dest) +{ + const char src[3] = "foo"; + return strcat (dest, src); /* { dg-warning "stack-based buffer over-read" } */ + /* { dg-message "while looking for null terminator for argument 2 \\('&src'\\) of 'strcat'" "" { target *-*-* } .-1 } */ +} + +char * __attribute__((noinline)) +call_strcat (char *dest, const char *src) +{ + return strcat (dest, src); +} + +void +test_concrete_valid_static_size (void) +{ + char buf[16]; + char *p1 = __builtin_strcpy (buf, "abc"); + char *p2 = call_strcat (buf, "def"); + __analyzer_eval (p1 == buf); /* { dg-warning "TRUE" } */ + __analyzer_eval (p2 == buf); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[0] == 'a'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[1] == 'b'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[2] == 'c'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[3] == 'd'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[4] == 'e'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[5] == 'f'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[6] == '\0'); /* { dg-warning "TRUE" } */ + __analyzer_eval (__builtin_strlen (buf) == 6); /* { dg-warning "TRUE" } */ +} + +void +test_concrete_valid_static_size_2 (void) +{ + char buf[16]; + char *p1 = __builtin_strcpy (buf, "abc"); + char *p2 = call_strcat (buf, "def"); + char *p3 = call_strcat (buf, "ghi"); + __analyzer_eval (p1 == buf); /* { dg-warning "TRUE" } */ + __analyzer_eval (p2 == buf); /* { dg-warning "TRUE" } */ + __analyzer_eval (p3 == buf); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[0] == 'a'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[1] == 'b'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[2] == 'c'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[3] == 'd'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[4] == 'e'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[5] == 'f'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[6] == 'g'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[7] == 'h'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[8] == 'i'); /* { dg-warning "TRUE" } */ + __analyzer_eval (buf[9] == '\0'); /* { dg-warning "TRUE" } */ + __analyzer_eval (__builtin_strlen (buf) == 9); /* { dg-warning "TRUE" } */ + __analyzer_eval (__builtin_strlen (buf + 1) == 8); /* { dg-warning "TRUE" } */ + __analyzer_eval (__builtin_strlen (buf + 2) == 7); /* { dg-warning "TRUE" } */ + __analyzer_eval (__builtin_strlen (buf + 3) == 6); /* { dg-warning "TRUE" } */ + __analyzer_eval (__builtin_strlen (buf + 4) == 5); /* { dg-warning "TRUE" } */ + __analyzer_eval (__builtin_strlen (buf + 5) == 4); /* { dg-warning "TRUE" } */ + __analyzer_eval (__builtin_strlen (buf + 6) == 3); /* { dg-warning "TRUE" } */ + __analyzer_eval (__builtin_strlen (buf + 7) == 2); /* { dg-warning "TRUE" } */ + __analyzer_eval (__builtin_strlen (buf + 8) == 1); /* { dg-warning "TRUE" } */ + __analyzer_eval (__builtin_strlen (buf + 9) == 0); /* { dg-warning "TRUE" } */ +} + +char * __attribute__((noinline)) +call_strcat_invalid (char *dest, const char *src) +{ + return strcat (dest, src); /* { dg-warning "stack-based buffer overflow" } */ +} + +void +test_concrete_invalid_static_size (void) +{ + char buf[3]; + buf[0] = '\0'; + call_strcat_invalid (buf, "abc"); +} + +void +test_concrete_symbolic (const char *suffix) +{ + char buf[10]; + buf[0] = '\0'; + call_strcat (buf, suffix); +} + +/* TODO: + - "The behavior is undefined if the strings overlap." +*/ |