diff options
author | Tim Lange <mail@tim-lange.me> | 2022-08-12 10:27:16 +0200 |
---|---|---|
committer | Tim Lange <mail@tim-lange.me> | 2022-08-12 10:46:12 +0200 |
commit | 7e3b45befdbbf1a1f9ff728fa2bac31b4756907c (patch) | |
tree | 6312f12e99ee27d3827ce85d61bc2036dca97c92 /gcc/analyzer/region-model.cc | |
parent | 2b75b3b6a4ddc0d65a84a0cc4b00c47ae70e52c0 (diff) | |
download | gcc-7e3b45befdbbf1a1f9ff728fa2bac31b4756907c.zip gcc-7e3b45befdbbf1a1f9ff728fa2bac31b4756907c.tar.gz gcc-7e3b45befdbbf1a1f9ff728fa2bac31b4756907c.tar.bz2 |
analyzer: out-of-bounds checker [PR106000]
This patch adds an experimental out-of-bounds checker to the analyzer.
The checker was tested on coreutils, curl, httpd and openssh. It is mostly
accurate but does produce false-positives on yacc-generated files and
sometimes when the analyzer misses an invariant. These cases will be
documented in bugzilla.
Regression-tested on Linux x86-64, further ran the analyzer tests with
the -m32 option.
2022-08-11 Tim Lange <mail@tim-lange.me>
gcc/analyzer/ChangeLog:
PR analyzer/106000
* analyzer.opt: Add Wanalyzer-out-of-bounds.
* region-model.cc (class out_of_bounds): Diagnostics base class
for all out-of-bounds diagnostics.
(class past_the_end): Base class derived from out_of_bounds for
the buffer_overflow and buffer_overread diagnostics.
(class buffer_overflow): Buffer overflow diagnostics.
(class buffer_overread): Buffer overread diagnostics.
(class buffer_underflow): Buffer underflow diagnostics.
(class buffer_underread): Buffer overread diagnostics.
(region_model::check_region_bounds): New function to check region
bounds for out-of-bounds accesses.
(region_model::check_region_access):
Add call to check_region_bounds.
(region_model::get_representative_tree): New function that accepts
a region instead of an svalue.
* region-model.h (class region_model):
Add region_model::check_region_bounds.
* region.cc (region::symbolic_p): New predicate.
(offset_region::get_byte_size_sval): Only return the remaining
byte size on offset_regions.
* region.h: Add region::symbolic_p.
* store.cc (byte_range::intersects_p):
Add new function equivalent to bit_range::intersects_p.
(byte_range::exceeds_p): New function.
(byte_range::falls_short_of_p): New function.
* store.h (struct byte_range): Add byte_range::intersects_p,
byte_range::exceeds_p and byte_range::falls_short_of_p.
gcc/ChangeLog:
PR analyzer/106000
* doc/invoke.texi: Add Wanalyzer-out-of-bounds.
gcc/testsuite/ChangeLog:
PR analyzer/106000
* g++.dg/analyzer/pr100244.C: Disable out-of-bounds warning.
* gcc.dg/analyzer/allocation-size-3.c:
Disable out-of-bounds warning.
* gcc.dg/analyzer/memcpy-2.c: Disable out-of-bounds warning.
* gcc.dg/analyzer/pr101962.c: Add dg-warning.
* gcc.dg/analyzer/pr96764.c: Disable out-of-bounds warning.
* gcc.dg/analyzer/pr97029.c:
Add dummy buffer to prevent an out-of-bounds warning.
* gcc.dg/analyzer/realloc-5.c: Add dg-warning.
* gcc.dg/analyzer/test-setjmp.h:
Add dummy buffer to prevent an out-of-bounds warning.
* gcc.dg/analyzer/zlib-3.c: Add dg-bogus.
* g++.dg/analyzer/out-of-bounds-placement-new.C: New test.
* gcc.dg/analyzer/out-of-bounds-1.c: New test.
* gcc.dg/analyzer/out-of-bounds-2.c: New test.
* gcc.dg/analyzer/out-of-bounds-3.c: New test.
* gcc.dg/analyzer/out-of-bounds-container_of.c: New test.
* gcc.dg/analyzer/out-of-bounds-coreutils.c: New test.
* gcc.dg/analyzer/out-of-bounds-curl.c: New test.
Diffstat (limited to 'gcc/analyzer/region-model.cc')
-rw-r--r-- | gcc/analyzer/region-model.cc | 422 |
1 files changed, 422 insertions, 0 deletions
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 8393c7d..7e70776 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -1268,6 +1268,414 @@ region_model::on_stmt_pre (const gimple *stmt, } } +/* Abstract base class for all out-of-bounds warnings. */ + +class out_of_bounds : public pending_diagnostic_subclass<out_of_bounds> +{ +public: + out_of_bounds (const region *reg, tree diag_arg, + byte_range out_of_bounds_range) + : m_reg (reg), m_diag_arg (diag_arg), + m_out_of_bounds_range (out_of_bounds_range) + {} + + const char *get_kind () const final override + { + return "out_of_bounds_diagnostic"; + } + + bool operator== (const out_of_bounds &other) const + { + return m_reg == other.m_reg + && m_out_of_bounds_range == other.m_out_of_bounds_range + && pending_diagnostic::same_tree_p (m_diag_arg, other.m_diag_arg); + } + + int get_controlling_option () const final override + { + return OPT_Wanalyzer_out_of_bounds; + } + + void mark_interesting_stuff (interesting_t *interest) final override + { + interest->add_region_creation (m_reg); + } + +protected: + const region *m_reg; + tree m_diag_arg; + byte_range m_out_of_bounds_range; +}; + +/* Abstract subclass to complaing about out-of-bounds + past the end of the buffer. */ + +class past_the_end : public out_of_bounds +{ +public: + past_the_end (const region *reg, tree diag_arg, byte_range range, + tree byte_bound) + : out_of_bounds (reg, diag_arg, range), m_byte_bound (byte_bound) + {} + + bool operator== (const past_the_end &other) const + { + return out_of_bounds::operator== (other) + && pending_diagnostic::same_tree_p (m_byte_bound, + other.m_byte_bound); + } + + label_text + describe_region_creation_event (const evdesc::region_creation &ev) final + override + { + if (m_byte_bound && TREE_CODE (m_byte_bound) == INTEGER_CST) + return ev.formatted_print ("capacity is %E bytes", m_byte_bound); + + return label_text (); + } + +protected: + tree m_byte_bound; +}; + +/* Concrete subclass to complain about buffer overflows. */ + +class buffer_overflow : public past_the_end +{ +public: + buffer_overflow (const region *reg, tree diag_arg, + byte_range range, tree byte_bound) + : past_the_end (reg, diag_arg, range, byte_bound) + {} + + bool emit (rich_location *rich_loc) final override + { + diagnostic_metadata m; + bool warned; + switch (m_reg->get_memory_space ()) + { + default: + m.add_cwe (787); + warned = warning_meta (rich_loc, m, get_controlling_option (), + "buffer overflow"); + break; + case MEMSPACE_STACK: + m.add_cwe (121); + warned = warning_meta (rich_loc, m, get_controlling_option (), + "stack-based buffer overflow"); + break; + case MEMSPACE_HEAP: + m.add_cwe (122); + warned = warning_meta (rich_loc, m, get_controlling_option (), + "heap-based buffer overflow"); + break; + } + + if (warned) + { + char num_bytes_past_buf[WIDE_INT_PRINT_BUFFER_SIZE]; + print_dec (m_out_of_bounds_range.m_size_in_bytes, + num_bytes_past_buf, UNSIGNED); + if (m_diag_arg) + inform (rich_loc->get_loc (), "write is %s bytes past the end" + " of %qE", num_bytes_past_buf, + m_diag_arg); + else + inform (rich_loc->get_loc (), "write is %s bytes past the end" + "of the region", + num_bytes_past_buf); + } + + return warned; + } + + label_text describe_final_event (const evdesc::final_event &ev) + final override + { + byte_size_t start = m_out_of_bounds_range.get_start_byte_offset (); + byte_size_t end = m_out_of_bounds_range.get_last_byte_offset (); + char start_buf[WIDE_INT_PRINT_BUFFER_SIZE]; + print_dec (start, start_buf, SIGNED); + char end_buf[WIDE_INT_PRINT_BUFFER_SIZE]; + print_dec (end, end_buf, SIGNED); + + if (start == end) + { + if (m_diag_arg) + return ev.formatted_print ("out-of-bounds write at byte %s but %qE" + " ends at byte %E", start_buf, m_diag_arg, + m_byte_bound); + return ev.formatted_print ("out-of-bounds write at byte %s but region" + " ends at byte %E", start_buf, + m_byte_bound); + } + else + { + if (m_diag_arg) + return ev.formatted_print ("out-of-bounds write from byte %s till" + " byte %s but %qE ends at byte %E", + start_buf, end_buf, m_diag_arg, + m_byte_bound); + return ev.formatted_print ("out-of-bounds write from byte %s till" + " byte %s but region ends at byte %E", + start_buf, end_buf, m_byte_bound); + } + } +}; + +/* Concrete subclass to complain about buffer overreads. */ + +class buffer_overread : public past_the_end +{ +public: + buffer_overread (const region *reg, tree diag_arg, + byte_range range, tree byte_bound) + : past_the_end (reg, diag_arg, range, byte_bound) + {} + + bool emit (rich_location *rich_loc) final override + { + diagnostic_metadata m; + m.add_cwe (126); + bool warned = warning_meta (rich_loc, m, get_controlling_option (), + "buffer overread"); + + if (warned) + { + char num_bytes_past_buf[WIDE_INT_PRINT_BUFFER_SIZE]; + print_dec (m_out_of_bounds_range.m_size_in_bytes, + num_bytes_past_buf, UNSIGNED); + if (m_diag_arg) + inform (rich_loc->get_loc (), "write is %s bytes past the end" + " of %qE", num_bytes_past_buf, + m_diag_arg); + else + inform (rich_loc->get_loc (), "write is %s bytes past the end" + "of the region", + num_bytes_past_buf); + } + + return warned; + } + + label_text describe_final_event (const evdesc::final_event &ev) + final override + { + byte_size_t start = m_out_of_bounds_range.get_start_byte_offset (); + byte_size_t end = m_out_of_bounds_range.get_last_byte_offset (); + char start_buf[WIDE_INT_PRINT_BUFFER_SIZE]; + print_dec (start, start_buf, SIGNED); + char end_buf[WIDE_INT_PRINT_BUFFER_SIZE]; + print_dec (end, end_buf, SIGNED); + + if (start == end) + { + if (m_diag_arg) + return ev.formatted_print ("out-of-bounds read at byte %s but %qE" + " ends at byte %E", start_buf, m_diag_arg, + m_byte_bound); + return ev.formatted_print ("out-of-bounds read at byte %s but region" + " ends at byte %E", start_buf, + m_byte_bound); + } + else + { + if (m_diag_arg) + return ev.formatted_print ("out-of-bounds read from byte %s till" + " byte %s but %qE ends at byte %E", + start_buf, end_buf, m_diag_arg, + m_byte_bound); + return ev.formatted_print ("out-of-bounds read from byte %s till" + " byte %s but region ends at byte %E", + start_buf, end_buf, m_byte_bound); + } + } +}; + +/* Concrete subclass to complain about buffer underflows. */ + +class buffer_underflow : public out_of_bounds +{ +public: + buffer_underflow (const region *reg, tree diag_arg, byte_range range) + : out_of_bounds (reg, diag_arg, range) + {} + + bool emit (rich_location *rich_loc) final override + { + diagnostic_metadata m; + m.add_cwe (124); + return warning_meta (rich_loc, m, get_controlling_option (), + "buffer underflow"); + } + + label_text describe_final_event (const evdesc::final_event &ev) + final override + { + byte_size_t start = m_out_of_bounds_range.get_start_byte_offset (); + byte_size_t end = m_out_of_bounds_range.get_last_byte_offset (); + char start_buf[WIDE_INT_PRINT_BUFFER_SIZE]; + print_dec (start, start_buf, SIGNED); + char end_buf[WIDE_INT_PRINT_BUFFER_SIZE]; + print_dec (end, end_buf, SIGNED); + + if (start == end) + { + if (m_diag_arg) + return ev.formatted_print ("out-of-bounds write at byte %s but %qE" + " starts at byte 0", start_buf, + m_diag_arg); + return ev.formatted_print ("out-of-bounds write at byte %s but region" + " starts at byte 0", start_buf); + } + else + { + if (m_diag_arg) + return ev.formatted_print ("out-of-bounds write from byte %s till" + " byte %s but %qE starts at byte 0", + start_buf, end_buf, m_diag_arg); + return ev.formatted_print ("out-of-bounds write from byte %s till" + " byte %s but region starts at byte 0", + start_buf, end_buf);; + } + } +}; + +/* Concrete subclass to complain about buffer underreads. */ + +class buffer_underread : public out_of_bounds +{ +public: + buffer_underread (const region *reg, tree diag_arg, byte_range range) + : out_of_bounds (reg, diag_arg, range) + {} + + bool emit (rich_location *rich_loc) final override + { + diagnostic_metadata m; + m.add_cwe (127); + return warning_meta (rich_loc, m, get_controlling_option (), + "buffer underread"); + } + + label_text describe_final_event (const evdesc::final_event &ev) + final override + { + byte_size_t start = m_out_of_bounds_range.get_start_byte_offset (); + byte_size_t end = m_out_of_bounds_range.get_last_byte_offset (); + char start_buf[WIDE_INT_PRINT_BUFFER_SIZE]; + print_dec (start, start_buf, SIGNED); + char end_buf[WIDE_INT_PRINT_BUFFER_SIZE]; + print_dec (end, end_buf, SIGNED); + + if (start == end) + { + if (m_diag_arg) + return ev.formatted_print ("out-of-bounds read at byte %s but %qE" + " starts at byte 0", start_buf, + m_diag_arg); + return ev.formatted_print ("out-of-bounds read at byte %s but region" + " starts at byte 0", start_buf); + } + else + { + if (m_diag_arg) + return ev.formatted_print ("out-of-bounds read from byte %s till" + " byte %s but %qE starts at byte 0", + start_buf, end_buf, m_diag_arg); + return ev.formatted_print ("out-of-bounds read from byte %s till" + " byte %s but region starts at byte 0", + start_buf, end_buf);; + } + } +}; + +/* May complain when the access on REG is out-of-bounds. */ + +void region_model::check_region_bounds (const region *reg, + enum access_direction dir, + region_model_context *ctxt) const +{ + gcc_assert (ctxt); + + region_offset reg_offset = reg->get_offset (); + const region *base_reg = reg_offset.get_base_region (); + + /* Bail out on symbolic offsets or symbolic regions. + (e.g. because the analyzer did not see previous offsets on the latter, + it might think that a negative access is before the buffer). */ + if (reg_offset.symbolic_p () || base_reg->symbolic_p ()) + return; + byte_offset_t offset_unsigned + = reg_offset.get_bit_offset () >> LOG2_BITS_PER_UNIT; + /* The constant offset from a pointer is represented internally as a sizetype + but should be interpreted as a signed value here. The statement below + converts the offset to a signed integer with the same precision the + sizetype has on the target system. + + For example, this is needed for out-of-bounds-3.c test1 to pass when + compiled with a 64-bit gcc build targeting 32-bit systems. */ + byte_offset_t offset + = offset_unsigned.to_shwi (TYPE_PRECISION (size_type_node)); + + /* Find out how many bytes were accessed. */ + const svalue *num_bytes_sval = reg->get_byte_size_sval (m_mgr); + tree num_bytes_tree = num_bytes_sval->maybe_get_constant (); + if (!num_bytes_tree || TREE_CODE (num_bytes_tree) != INTEGER_CST) + /* If we do not know how many bytes were read/written, + assume that at least one byte was read/written. */ + num_bytes_tree = integer_one_node; + + byte_range out (0, 0); + /* NUM_BYTES_TREE should always be interpreted as unsigned. */ + byte_range read_bytes (offset, wi::to_offset (num_bytes_tree).to_uhwi ()); + /* If read_bytes has a subset < 0, we do have an underflow. */ + if (read_bytes.falls_short_of_p (0, &out)) + { + tree diag_arg = get_representative_tree (reg->get_base_region ()); + switch (dir) + { + default: + gcc_unreachable (); + break; + case DIR_READ: + ctxt->warn (new buffer_underread (reg, diag_arg, out)); + break; + case DIR_WRITE: + ctxt->warn (new buffer_underflow (reg, diag_arg, out)); + break; + } + } + + const svalue *capacity = get_capacity (base_reg); + tree cst_capacity_tree = capacity->maybe_get_constant (); + if (!cst_capacity_tree || TREE_CODE (cst_capacity_tree) != INTEGER_CST) + return; + + byte_range buffer (0, wi::to_offset (cst_capacity_tree)); + /* If READ_BYTES exceeds BUFFER, we do have an overflow. */ + if (read_bytes.exceeds_p (buffer, &out)) + { + tree byte_bound = wide_int_to_tree (size_type_node, + buffer.get_next_byte_offset ()); + tree diag_arg = get_representative_tree (reg->get_base_region ()); + + switch (dir) + { + default: + gcc_unreachable (); + break; + case DIR_READ: + ctxt->warn (new buffer_overread (reg, diag_arg, out, byte_bound)); + break; + case DIR_WRITE: + ctxt->warn (new buffer_overflow (reg, diag_arg, out, byte_bound)); + break; + } + } +} + /* Ensure that all arguments at the call described by CD are checked for poisoned values, by calling get_rvalue on each argument. */ @@ -2825,6 +3233,7 @@ region_model::check_region_access (const region *reg, return; check_region_for_taint (reg, dir, ctxt); + check_region_bounds (reg, dir, ctxt); switch (dir) { @@ -3820,6 +4229,19 @@ region_model::get_representative_tree (const svalue *sval) const return fixup_tree_for_diagnostic (expr); } +tree +region_model::get_representative_tree (const region *reg) const +{ + svalue_set visited; + tree expr = get_representative_path_var (reg, &visited).m_tree; + + /* Strip off any top-level cast. */ + if (expr && TREE_CODE (expr) == NOP_EXPR) + expr = TREE_OPERAND (expr, 0); + + return fixup_tree_for_diagnostic (expr); +} + /* Implementation of region_model::get_representative_path_var. Attempt to return a path_var that represents REG, or return |