diff options
54 files changed, 4383 insertions, 148 deletions
diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 8a7dbf7..c478ec8 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1281,6 +1281,7 @@ C_COMMON_OBJS = c-family/c-common.o c-family/c-cppbuiltin.o c-family/c-dump.o \ # Analyzer object files ANALYZER_OBJS = \ + analyzer/access-diagram.o \ analyzer/analysis-plan.o \ analyzer/analyzer.o \ analyzer/analyzer-language.o \ diff --git a/gcc/analyzer/access-diagram.cc b/gcc/analyzer/access-diagram.cc new file mode 100644 index 0000000..968ff50 --- /dev/null +++ b/gcc/analyzer/access-diagram.cc @@ -0,0 +1,2405 @@ +/* Text art visualizations within -fanalyzer. + Copyright (C) 2023 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#define INCLUDE_ALGORITHM +#define INCLUDE_MEMORY +#define INCLUDE_MAP +#define INCLUDE_SET +#include "system.h" +#include "coretypes.h" +#include "coretypes.h" +#include "tree.h" +#include "function.h" +#include "basic-block.h" +#include "gimple.h" +#include "diagnostic.h" +#include "intl.h" +#include "make-unique.h" +#include "tree-diagnostic.h" /* for default_tree_printer. */ +#include "analyzer/analyzer.h" +#include "analyzer/region-model.h" +#include "analyzer/access-diagram.h" +#include "text-art/ruler.h" +#include "fold-const.h" + +#if ENABLE_ANALYZER + +/* Consider this code: + int32_t arr[10]; + arr[10] = x; + where we've emitted a buffer overflow diagnostic like this: + out-of-bounds write from byte 40 till byte 43 but 'arr' ends at byte 40 + + We want to emit a diagram that visualizes: + - the spatial relationship between the valid region to access, versus + the region that was actually accessed: does it overlap, was it touching, + close, or far away? Was it before or after in memory? What are the + relative sizes involved? + - the direction of the access (read vs write) + + The following code supports emitting diagrams similar to the following: + + # +--------------------------------+ + # |write from ‘x’ (type: ‘int32_t’)| + # +--------------------------------+ + # | + # | + # v + # +---------+-----------+-----------+ +--------------------------------+ + # | [0] | ... | [9] | | after valid range | + # +---------+-----------+-----------+ | | + # | ‘arr’ (type: ‘int32_t[10]’) | | | + # +---------------------------------+ +--------------------------------+ + # |~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~| |~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~| + # | | + # +---------+--------+ +---------+---------+ + # |capacity: 40 bytes| |overflow of 4 bytes| + # +------------------+ +-------------------+ + + where the diagram is laid out via table columns where each table column + represents either a range of bits/bytes, or is a spacing column (to highlight + the boundary between valid vs invalid accesses). The table columns can be + seen via -fanalyzer-debug-text-art. For example, here there are 5 table + columns ("tc0" through "tc4"): + + # +---------+-----------+-----------+---+--------------------------------+ + # | tc0 | tc1 | tc2 |tc3| tc4 | + # +---------+-----------+-----------+---+--------------------------------+ + # |bytes 0-3|bytes 4-35 |bytes 36-39| | bytes 40-43 | + # +---------+-----------+-----------+ +--------------------------------+ + # + # +--------------------------------+ + # |write from ‘x’ (type: ‘int32_t’)| + # +--------------------------------+ + # | + # | + # v + # +---------+-----------+-----------+ +--------------------------------+ + # | [0] | ... | [9] | | after valid range | + # +---------+-----------+-----------+ | | + # | ‘arr’ (type: ‘int32_t[10]’) | | | + # +---------------------------------+ +--------------------------------+ + # |~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~| |~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~| + # | | + # +---------+--------+ +---------+---------+ + # |capacity: 40 bytes| |overflow of 4 bytes| + # +------------------+ +-------------------+ + + The diagram is built up from the following: + + # +--------------------------------+ + # | ITEM FOR SVALUE/ACCESSED REGION| + # +--------------------------------+ + # | + # | DIRECTION WIDGET + # v + # +---------------------------------+ +--------------------------------+ + # | VALID REGION | | INVALID ACCESS | + # +---------------------------------+ +--------------------------------+ + # + # | VALID-VS-INVALID RULER | + + i.e. a vbox_widget containing 4 child widgets laid out vertically: + - ALIGNED CHILD WIDGET: ITEM FOR SVALUE/ACCESSED REGION + - DIRECTION WIDGET + - ALIGNED CHILD WIDGET: VALID AND INVALID ACCESSES + - VALID-VS-INVALID RULER. + + A more complicated example, given this overflow: + char buf[100]; + strcpy (buf, LOREM_IPSUM); + + 01| +---+---+---+---+---+---+----------+-----+-----+-----+-----+-----+-----+ + 02| |[0]|[1]|[2]|[3]|[4]|[5]| ... |[440]|[441]|[442]|[443]|[444]|[445]| + 03| +---+---+---+---+---+---+ +-----+-----+-----+-----+-----+-----+ + 04| |'L'|'o'|'r'|'e'|'m'|' '| | 'o' | 'r' | 'u' | 'm' | '.' | NUL | + 05| +---+---+---+---+---+---+----------+-----+-----+-----+-----+-----+-----+ + 06| | string literal (type: 'char[446]') | + 07| +----------------------------------------------------------------------+ + 08| | | | | | | | | | | | | | | | + 09| | | | | | | | | | | | | | | | + 10| v v v v v v v v v v v v v v v + 11| +---+---------------------+----++--------------------------------------+ + 12| |[0]| ... |[99]|| after valid range | + 13| +---+---------------------+----+| | + 14| | 'buf' (type: 'char[100]') || | + 15| +------------------------------++--------------------------------------+ + 16| |~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~||~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~| + 17| | | + 18| +---------+---------+ +----------+----------+ + 19| |capacity: 100 bytes| |overflow of 346 bytes| + 20| +-------------------+ +---------------------+ + + which is: + + 01| ALIGNED CHILD WIDGET (lines 01-07): (string_region_spatial_item)-+-----+ + 02| |[0]|[1]|[2]|[3]|[4]|[5]| ... |[440]|[441]|[442]|[443]|[444]|[445]| + 03| +---+---+---+---+---+---+ +-----+-----+-----+-----+-----+-----+ + 04| |'L'|'o'|'r'|'e'|'m'|' '| | 'o' | 'r' | 'u' | 'm' | '.' | NUL | + 05| +---+---+---+---+---+---+----------+-----+-----+-----+-----+-----+-----+ + 06| | string literal (type: 'char[446]') | + 07| +----------------------------------------------------------------------+ + 08| DIRECTION WIDGET (lines 08-10) | | | | | | | + 09| | | | | | | | | | | | | | | | + 10| v v v v v v v v v v v v v v v + 11| ALIGNED CHILD WIDGET (lines 11-15)-------------------------------------+ + 12| VALID REGION ... |[99]|| INVALID ACCESS | + 13| +---+---------------------+----+| | + 14| | 'buf' (type: 'char[100]') || | + 15| +------------------------------++--------------------------------------+ + 16| VALID-VS-INVALID RULER (lines 16-20): ~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~| + 17| | | + 18| +---------+---------+ +----------+----------+ + 19| |capacity: 100 bytes| |overflow of 346 bytes| + 20| +-------------------+ +---------------------+ + + We build the diagram in several phases: + - (1) we construct an access_diagram_impl widget. Within the ctor, we have + these subphases: + - (1.1) find all of the boundaries of interest + - (1.2) use the boundaries to build a bit_table_map, associating bit ranges + with table columns (e.g. "byte 0 is column 0, bytes 1-98 are column 2" etc) + - (1.3) create child widgets that share this table-based geometry + - (2) ask the widget for its size request + - (2.1) column widths and row heights for the table are computed by + access_diagram_impl::calc_req_size + - (2.2) child widgets request sizes based on these widths/heights + - (3) create a canvas of the appropriate size + - (4) paint the widget hierarchy to the canvas. */ + + +using namespace text_art; + +namespace ana { + +static styled_string +fmt_styled_string (style_manager &sm, + const char *fmt, ...) + ATTRIBUTE_GCC_DIAG(2, 3); + +static styled_string +fmt_styled_string (style_manager &sm, + const char *fmt, ...) +{ + va_list ap; + va_start (ap, fmt); + styled_string result + = styled_string::from_fmt_va (sm, default_tree_printer, fmt, &ap); + va_end (ap); + return result; +} + +class access_diagram_impl; +class bit_to_table_map; + +static void +pp_bit_size_t (pretty_printer *pp, bit_size_t num_bits) +{ + if (num_bits % BITS_PER_UNIT == 0) + { + byte_size_t num_bytes = num_bits / BITS_PER_UNIT; + if (num_bytes == 1) + pp_printf (pp, _("%wi byte"), num_bytes.to_uhwi ()); + else + pp_printf (pp, _("%wi bytes"), num_bytes.to_uhwi ()); + } + else + { + if (num_bits == 1) + pp_printf (pp, _("%wi bit"), num_bits.to_uhwi ()); + else + pp_printf (pp, _("%wi bits"), num_bits.to_uhwi ()); + } +} + +static styled_string +get_access_size_str (style_manager &sm, + const access_operation &op, + access_range accessed_range, + tree type) +{ + bit_size_expr num_bits; + if (accessed_range.get_size (op.m_model, &num_bits)) + { + if (type) + { + styled_string s; + + pretty_printer pp; + num_bits.print (&pp); + + if (op.m_dir == DIR_READ) + return fmt_styled_string (sm, + _("read of %qT (%s)"), + type, + pp_formatted_text (&pp)); + else + return fmt_styled_string (sm, + _("write of %qT (%s)"), + type, + pp_formatted_text (&pp)); + } + if (op.m_dir == DIR_READ) + return num_bits.get_formatted_str (sm, + _("read of %wi bit"), + _("read of %wi bits"), + _("read of %wi byte"), + _("read of %wi bytes"), + _("read of %qE bits"), + _("read of %qE bytes")); + else + return num_bits.get_formatted_str (sm, + _("write of %wi bit"), + _("write of %wi bits"), + _("write of %wi byte"), + _("write of %wi bytes"), + _("write of %qE bits"), + _("write of %qE bytes")); + } + + if (type) + { + if (op.m_dir == DIR_READ) + return fmt_styled_string (sm, _("read of %qT"), type); + else + return fmt_styled_string (sm, _("write of %qT"), type); + } + + if (op.m_dir == DIR_READ) + return styled_string (sm, _("read")); + else + return styled_string (sm, _("write")); +} + +/* Subroutine of clean_up_for_diagram. */ + +static tree +strip_any_cast (tree expr) +{ + if (TREE_CODE (expr) == NOP_EXPR + || TREE_CODE (expr) == NON_LVALUE_EXPR) + expr = TREE_OPERAND (expr, 0); + return expr; +} + +/* Subroutine of clean_up_for_diagram. */ + +static tree +remove_ssa_names (tree expr) +{ + if (TREE_CODE (expr) == SSA_NAME + && SSA_NAME_VAR (expr)) + return SSA_NAME_VAR (expr); + tree t = copy_node (expr); + for (int i = 0; i < TREE_OPERAND_LENGTH (expr); i++) + TREE_OPERAND (t, i) = remove_ssa_names (TREE_OPERAND (expr, i)); + return t; +} + +/* We want to be able to print tree expressions from the analyzer, + which is in the middle end. + + We could use the front-end pretty_printer's formatting routine, + but: + (a) some have additional state in a pretty_printer subclass, so we'd + need to clone global_dc->printer + (b) the "aka" type information added by the C and C++ frontends are + too verbose when building a diagram, and there isn't a good way to ask + for a less verbose version of them. + + Hence we use default_tree_printer. + However, we want to avoid printing SSA names, and instead print the + underlying var name. + Ideally there would be a better tree printer for use by middle end + warnings, but as workaround, this function clones a tree, replacing + SSA names with the var names. */ + +tree +clean_up_for_diagram (tree expr) +{ + tree without_ssa_names = remove_ssa_names (expr); + return strip_any_cast (without_ssa_names); +} + +/* struct bit_size_expr. */ + +text_art::styled_string +bit_size_expr::get_formatted_str (text_art::style_manager &sm, + const char *concrete_single_bit_fmt, + const char *concrete_plural_bits_fmt, + const char *concrete_single_byte_fmt, + const char *concrete_plural_bytes_fmt, + const char *symbolic_bits_fmt, + const char *symbolic_bytes_fmt) const +{ + if (TREE_CODE (m_num_bits) == INTEGER_CST) + { + bit_size_t concrete_num_bits = wi::to_offset (m_num_bits); + if (concrete_num_bits % BITS_PER_UNIT == 0) + { + byte_size_t concrete_num_bytes = concrete_num_bits / BITS_PER_UNIT; + if (concrete_num_bytes == 1) + return fmt_styled_string (sm, concrete_single_byte_fmt, + concrete_num_bytes.to_uhwi ()); + else + return fmt_styled_string (sm, concrete_plural_bytes_fmt, + concrete_num_bytes.to_uhwi ()); + } + else + { + if (concrete_num_bits == 1) + return fmt_styled_string (sm, concrete_single_bit_fmt, + concrete_num_bits.to_uhwi ()); + else + return fmt_styled_string (sm, concrete_plural_bits_fmt, + concrete_num_bits.to_uhwi ()); + } + } + else + { + if (tree bytes_expr = maybe_get_as_bytes ()) + return fmt_styled_string (sm, + symbolic_bytes_fmt, + clean_up_for_diagram (bytes_expr)); + return fmt_styled_string (sm, + symbolic_bits_fmt, + clean_up_for_diagram (m_num_bits)); + } +} + +void +bit_size_expr::print (pretty_printer *pp) const +{ + if (TREE_CODE (m_num_bits) == INTEGER_CST) + { + bit_size_t concrete_num_bits = wi::to_offset (m_num_bits); + pp_bit_size_t (pp, concrete_num_bits); + } + else + { + if (tree bytes_expr = maybe_get_as_bytes ()) + pp_printf (pp, _("%qE bytes"), bytes_expr); + else + pp_printf (pp, _("%qE bits"), m_num_bits); + } +} + +tree +bit_size_expr::maybe_get_as_bytes () const +{ + switch (TREE_CODE (m_num_bits)) + { + default: + break; + case INTEGER_CST: + { + const bit_size_t num_bits = wi::to_offset (m_num_bits); + if (num_bits % BITS_PER_UNIT != 0) + return NULL_TREE; + const bit_size_t num_bytes = num_bits / BITS_PER_UNIT; + return wide_int_to_tree (size_type_node, num_bytes); + } + break; + case PLUS_EXPR: + case MINUS_EXPR: + { + bit_size_expr op0 + = bit_size_expr (TREE_OPERAND (m_num_bits, 0)); + tree op0_as_bytes = op0.maybe_get_as_bytes (); + if (!op0_as_bytes) + return NULL_TREE; + bit_size_expr op1 + = bit_size_expr (TREE_OPERAND (m_num_bits, 1)); + tree op1_as_bytes = op1.maybe_get_as_bytes (); + if (!op1_as_bytes) + return NULL_TREE; + return fold_build2 (TREE_CODE (m_num_bits), size_type_node, + op0_as_bytes, op1_as_bytes); + } + break; + case MULT_EXPR: + { + bit_size_expr op1 + = bit_size_expr (TREE_OPERAND (m_num_bits, 1)); + if (tree op1_as_bytes = op1.maybe_get_as_bytes ()) + return fold_build2 (MULT_EXPR, size_type_node, + TREE_OPERAND (m_num_bits, 0), + op1_as_bytes); + } + break; + } + return NULL_TREE; +} + +/* struct access_range. */ + +access_range::access_range (const region *base_region, const bit_range &bits) +: m_start (region_offset::make_concrete (base_region, + bits.get_start_bit_offset ())), + m_next (region_offset::make_concrete (base_region, + bits.get_next_bit_offset ())) +{ +} + +access_range::access_range (const region *base_region, const byte_range &bytes) +: m_start (region_offset::make_concrete (base_region, + bytes.get_start_bit_offset ())), + m_next (region_offset::make_concrete (base_region, + bytes.get_next_bit_offset ())) +{ +} + +access_range::access_range (const region ®, region_model_manager *mgr) +: m_start (reg.get_offset (mgr)), + m_next (reg.get_next_offset (mgr)) +{ +} + +bool +access_range::get_size (const region_model &model, bit_size_expr *out) const +{ + tree start_expr = m_start.calc_symbolic_bit_offset (model); + if (!start_expr) + return false; + tree next_expr = m_next.calc_symbolic_bit_offset (model); + if (!next_expr) + return false; + *out = bit_size_expr (fold_build2 (MINUS_EXPR, size_type_node, + next_expr, start_expr)); + return true; +} + +bool +access_range::contains_p (const access_range &other) const +{ + return (m_start <= other.m_start + && other.m_next <= m_next); +} + +bool +access_range::empty_p () const +{ + bit_range concrete_bits (0, 0); + if (!as_concrete_bit_range (&concrete_bits)) + return false; + return concrete_bits.empty_p (); +} + +void +access_range::dump_to_pp (pretty_printer *pp, bool simple) const +{ + if (m_start.concrete_p () && m_next.concrete_p ()) + { + bit_range bits (m_start.get_bit_offset (), + m_next.get_bit_offset () - m_start.get_bit_offset ()); + bits.dump_to_pp (pp); + return; + } + pp_character (pp, '['); + m_start.dump_to_pp (pp, simple); + pp_string (pp, " to "); + m_next.dump_to_pp (pp, simple); + pp_character (pp, ')'); +} + +DEBUG_FUNCTION void +access_range::dump (bool simple) const +{ + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + pp_show_color (&pp) = pp_show_color (global_dc->printer); + pp.buffer->stream = stderr; + dump_to_pp (&pp, simple); + pp_newline (&pp); + pp_flush (&pp); +} + +void +access_range::log (const char *title, logger &logger) const +{ + logger.start_log_line (); + logger.log_partial ("%s: ", title); + dump_to_pp (logger.get_printer (), true); + logger.end_log_line (); +} + +/* struct access_operation. */ + +access_range +access_operation::get_valid_bits () const +{ + const svalue *capacity_in_bytes_sval = m_model.get_capacity (m_base_region); + return access_range + (region_offset::make_concrete (m_base_region, 0), + region_offset::make_byte_offset (m_base_region, capacity_in_bytes_sval)); +} + +access_range +access_operation::get_actual_bits () const +{ + return access_range (m_reg, get_manager ()); +} + +/* If there are any bits accessed invalidly before the valid range, + return true and write their range to *OUT. + Return false if there aren't, or if there's a problem + (e.g. symbolic ranges. */ + +bool +access_operation::maybe_get_invalid_before_bits (access_range *out) const +{ + access_range valid_bits (get_valid_bits ()); + access_range actual_bits (get_actual_bits ()); + + if (actual_bits.m_start >= valid_bits.m_start) + { + /* No part of accessed range is before the valid range. */ + return false; + } + else if (actual_bits.m_next > valid_bits.m_start) + { + /* Get part of accessed range that's before the valid range. */ + *out = access_range (actual_bits.m_start, valid_bits.m_start); + return true; + } + else + { + /* Accessed range is fully before valid range. */ + *out = actual_bits; + return true; + } +} + +/* If there are any bits accessed invalidly after the valid range, + return true and write their range to *OUT. + Return false if there aren't, or if there's a problem. */ + +bool +access_operation::maybe_get_invalid_after_bits (access_range *out) const +{ + access_range valid_bits (get_valid_bits ()); + access_range actual_bits (get_actual_bits ()); + + if (actual_bits.m_next <= valid_bits.m_next) + { + /* No part of accessed range is after the valid range. */ + return false; + } + else if (actual_bits.m_start < valid_bits.m_next) + { + /* Get part of accessed range that's after the valid range. */ + *out = access_range (valid_bits.m_next, actual_bits.m_next); + return true; + } + else + { + /* Accessed range is fully after valid range. */ + *out = actual_bits; + return true; + } +} + +/* A class for capturing all of the region offsets of interest (both concrete + and symbolic), to help align everything in the diagram. + Boundaries can be soft or hard; hard boundaries are emphasized visually + (e.g. the boundary between valid vs invalid accesses). + + Offsets in the boundaries are all expressed relative to the base + region of the access_operation. */ + +class boundaries +{ +public: + enum class kind { HARD, SOFT}; + + boundaries (const region &base_reg) + : m_base_reg (base_reg) + { + } + + void add (region_offset offset, enum kind k) + { + m_all_offsets.insert (offset); + if (k == kind::HARD) + m_hard_offsets.insert (offset); + } + + void add (const access_range &range, enum kind kind) + { + add (range.m_start, kind); + add (range.m_next, kind); + } + + void add (const region ®, region_model_manager *mgr, enum kind kind) + { + add (access_range (reg.get_offset (mgr), + reg.get_next_offset (mgr)), + kind); + } + + void add (const byte_range bytes, enum kind kind) + { + add (access_range (&m_base_reg, bytes), kind); + } + + void add_all_bytes_in_range (const byte_range &bytes) + { + for (byte_offset_t byte_idx = bytes.get_start_byte_offset (); + byte_idx <= bytes.get_next_byte_offset (); + byte_idx = byte_idx + 1) + add (region_offset::make_concrete (&m_base_reg, byte_idx * 8), + kind::SOFT); + } + + void add_all_bytes_in_range (const access_range &range) + { + byte_range bytes (0, 0); + bool valid = range.as_concrete_byte_range (&bytes); + gcc_assert (valid); + add_all_bytes_in_range (bytes); + } + + void log (logger &logger) const + { + logger.log ("boundaries:"); + logger.inc_indent (); + for (auto offset : m_all_offsets) + { + enum kind k = get_kind (offset); + logger.start_log_line (); + logger.log_partial ("%s: ", (k == kind::HARD) ? "HARD" : "soft"); + offset.dump_to_pp (logger.get_printer (), true); + logger.end_log_line (); + } + logger.dec_indent (); + } + + enum kind get_kind (region_offset offset) const + { + gcc_assert (m_all_offsets.find (offset) != m_all_offsets.end ()); + if (m_hard_offsets.find (offset) != m_hard_offsets.end ()) + return kind::HARD; + else + return kind::SOFT; + } + + std::set<region_offset>::const_iterator begin () const + { + return m_all_offsets.begin (); + } + std::set<region_offset>::const_iterator end () const + { + return m_all_offsets.end (); + } + std::set<region_offset>::size_type size () const + { + return m_all_offsets.size (); + } + +private: + const region &m_base_reg; + std::set<region_offset> m_all_offsets; + std::set<region_offset> m_hard_offsets; +}; + +/* A widget that wraps a table but offloads column-width calculation + to a shared object, so that we can vertically line up multiple tables + and have them all align their columns. + + For example, in: + + 01| +---+---+---+---+---+---+----------+-----+-----+-----+-----+-----+-----+ + 02| |[0]|[1]|[2]|[3]|[4]|[5]| ... |[440]|[441]|[442]|[443]|[444]|[445]| + 03| +---+---+---+---+---+---+ +-----+-----+-----+-----+-----+-----+ + 04| |'L'|'o'|'r'|'e'|'m'|' '| | 'o' | 'r' | 'u' | 'm' | '.' | NUL | + 05| +---+---+---+---+---+---+----------+-----+-----+-----+-----+-----+-----+ + 06| | string literal (type: 'char[446]') | + 07| +----------------------------------------------------------------------+ + 08| | | | | | | | | | | | | | | | + 09| | | | | | | | | | | | | | | | + 10| v v v v v v v v v v v v v v v + 11|+---+---------------------+----++--------------------------------------+ + 12||[0]| ... |[99]|| after valid range | + 13|+---+---------------------+----+| | + 14|| 'buf' (type: 'char[100]') || | + 15|+------------------------------++--------------------------------------+ + 16||~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~||~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~| + 17| | | + 18| +---------+---------+ +----------+----------+ + 19| |capacity: 100 bytes| |overflow of 346 bytes| + 20| +-------------------+ +---------------------+ + + rows 01-07 and rows 11-15 are x_aligned_table_widget instances. */ + +class x_aligned_table_widget : public leaf_widget +{ +public: + x_aligned_table_widget (table t, + const theme &theme, + table_dimension_sizes &col_widths) + : m_table (std::move (t)), + m_theme (theme), + m_col_widths (col_widths), + m_row_heights (t.get_size ().h), + m_cell_sizes (m_col_widths, m_row_heights), + m_tg (m_table, m_cell_sizes) + { + } + + const char *get_desc () const override + { + return "x_aligned_table_widget"; + } + + canvas::size_t calc_req_size () final override + { + /* We don't compute the size requirements; + the parent should have done this. */ + return m_tg.get_canvas_size (); + } + + void paint_to_canvas (canvas &canvas) final override + { + m_table.paint_to_canvas (canvas, + get_top_left (), + m_tg, + m_theme); + } + + const table &get_table () const { return m_table; } + table_cell_sizes &get_cell_sizes () { return m_cell_sizes; } + void recalc_coords () + { + m_tg.recalc_coords (); + } + +private: + table m_table; + const theme &m_theme; + table_dimension_sizes &m_col_widths; // Reference to shared column widths + table_dimension_sizes m_row_heights; // Unique row heights + table_cell_sizes m_cell_sizes; + table_geometry m_tg; +}; + +/* A widget for printing arrows between the accessed region + and the svalue, showing the direction of the access. + + For example, in: + + 01| +---+---+---+---+---+---+----------+-----+-----+-----+-----+-----+-----+ + 02| |[0]|[1]|[2]|[3]|[4]|[5]| ... |[440]|[441]|[442]|[443]|[444]|[445]| + 03| +---+---+---+---+---+---+ +-----+-----+-----+-----+-----+-----+ + 04| |'L'|'o'|'r'|'e'|'m'|' '| | 'o' | 'r' | 'u' | 'm' | '.' | NUL | + 05| +---+---+---+---+---+---+----------+-----+-----+-----+-----+-----+-----+ + 06| | string literal (type: 'char[446]') | + 07| +----------------------------------------------------------------------+ + 08| | | | | | | | | | | | | | | | + 09| | | | | | | | | | | | | | | | + 10| v v v v v v v v v v v v v v v + 11|+---+---------------------+----++--------------------------------------+ + 12||[0]| ... |[99]|| after valid range | + 13|+---+---------------------+----+| | + 14|| 'buf' (type: 'char[100]') || | + 15|+------------------------------++--------------------------------------+ + 16||~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~||~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~| + 17| | | + 18| +---------+---------+ +----------+----------+ + 19| |capacity: 100 bytes| |overflow of 346 bytes| + 20| +-------------------+ +---------------------+ + + rows 8-10 are the direction widget. */ + +class direction_widget : public leaf_widget +{ +public: + direction_widget (const access_diagram_impl &dia_impl, + const bit_to_table_map &btm) + : leaf_widget (), + m_dia_impl (dia_impl), + m_btm (btm) + { + } + const char *get_desc () const override + { + return "direction_widget"; + } + canvas::size_t calc_req_size () final override + { + /* Get our width from our siblings. */ + return canvas::size_t (0, 3); + } + void paint_to_canvas (canvas &canvas) final override; + +private: + const access_diagram_impl &m_dia_impl; + const bit_to_table_map &m_btm; +}; + +/* A widget for adding an x_ruler to a diagram based on table columns, + offloading column-width calculation to shared objects, so that the ruler + lines up with other tables in the diagram. + + For example, in: + + 01| +---+---+---+---+---+---+----------+-----+-----+-----+-----+-----+-----+ + 02| |[0]|[1]|[2]|[3]|[4]|[5]| ... |[440]|[441]|[442]|[443]|[444]|[445]| + 03| +---+---+---+---+---+---+ +-----+-----+-----+-----+-----+-----+ + 04| |'L'|'o'|'r'|'e'|'m'|' '| | 'o' | 'r' | 'u' | 'm' | '.' | NUL | + 05| +---+---+---+---+---+---+----------+-----+-----+-----+-----+-----+-----+ + 06| | string literal (type: 'char[446]') | + 07| +----------------------------------------------------------------------+ + 08| | | | | | | | | | | | | | | | + 09| | | | | | | | | | | | | | | | + 10| v v v v v v v v v v v v v v v + 11|+---+---------------------+----++--------------------------------------+ + 12||[0]| ... |[99]|| after valid range | + 13|+---+---------------------+----+| | + 14|| 'buf' (type: 'char[100]') || | + 15|+------------------------------++--------------------------------------+ + 16||~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~||~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~| + 17| | | + 18| +---------+---------+ +----------+----------+ + 19| |capacity: 100 bytes| |overflow of 346 bytes| + 20| +-------------------+ +---------------------+ + + rows 16-20 are the x_aligned_x_ruler_widget. */ + +class x_aligned_x_ruler_widget : public leaf_widget +{ +public: + x_aligned_x_ruler_widget (const access_diagram_impl &dia_impl, + const theme &theme, + table_dimension_sizes &col_widths) + : m_dia_impl (dia_impl), + m_theme (theme), + m_col_widths (col_widths) + { + } + + const char *get_desc () const override + { + return "x_aligned_ruler_widget"; + } + + void add_range (const table::range_t &x_range, + styled_string text, + style::id_t style_id) + { + m_labels.push_back (label (x_range, std::move (text), style_id)); + } + + canvas::size_t calc_req_size () final override + { + x_ruler r (make_x_ruler ()); + return r.get_size (); + } + + void paint_to_canvas (canvas &canvas) final override + { + x_ruler r (make_x_ruler ()); + r.paint_to_canvas (canvas, + get_top_left (), + m_theme); + } + +private: + struct label + { + label (const table::range_t &table_x_range, + styled_string text, + style::id_t style_id) + : m_table_x_range (table_x_range), + m_text (std::move (text)), + m_style_id (style_id) + { + } + table::range_t m_table_x_range; + styled_string m_text; + style::id_t m_style_id; + }; + + x_ruler make_x_ruler () const; + + const access_diagram_impl &m_dia_impl; + const theme &m_theme; + table_dimension_sizes &m_col_widths; + std::vector<label> m_labels; +}; + +/* A two-way mapping between access_ranges and table columns, for use by + spatial_item subclasses for creating tables. + For example when visualizing a bogus access of 'int arr[10];' + at 'arr[10]', we might have: + - table column 0 is "bytes 0-3" (for arr[0]) + - table column 1 is "bytes 4-35" (for arr[1] through arr[8]) + - table column 2 is "bytes 36-39 (for arr[9]) + - table column 3 is blank to emphasize a hard boundary between + valid/invalid accesses. + - table column 4 is "bytes 40-44" (for arr[10]) + + We store this as a pair of maps from region_offset to table x; in + the abvove example: + + region offset table_x prev_table_x + bit 0 (aka byte 0) 0 (none) + bit 32 (aka byte 4) 1 0 + bit 288 (aka byte 36) 2 1 + bit 320 (aka byte 40) 4 2 + bit 352 (aka byte 44) (none) (none) + + so that e.g given the half-open byte range [0, 40) + we can determine the closed range of table x [0, 2]. */ + +class bit_to_table_map +{ +public: + /* Populate m_table_x_for_bit and m_bit_for_table_x. */ + void populate (const boundaries &boundaries, logger *logger) + { + LOG_SCOPE (logger); + + int table_x = 0; + std::vector <region_offset> vec_boundaries (boundaries.begin (), + boundaries.end ()); + + /* Sort into an order that makes sense. */ + std::sort (vec_boundaries.begin (), + vec_boundaries.end ()); + + if (logger) + { + logger->log ("vec_boundaries"); + logger->inc_indent (); + for (unsigned idx = 0; idx < vec_boundaries.size (); idx++) + { + logger->start_log_line (); + logger->log_partial ("idx: %i: ", idx); + vec_boundaries[idx].dump_to_pp (logger->get_printer (), true); + logger->end_log_line (); + } + logger->dec_indent (); + } + + for (size_t idx = 0; idx < vec_boundaries.size (); idx++) + { + const region_offset &offset = vec_boundaries[idx]; + if (idx > 0 && (idx + 1) < vec_boundaries.size ()) + { + if (boundaries.get_kind (offset) == boundaries::kind::HARD) + table_x += 1; + } + m_table_x_for_offset[offset] = table_x; + if ((idx + 1) < vec_boundaries.size ()) + { + const region_offset &next_offset = vec_boundaries[idx + 1]; + m_table_x_for_prev_offset[next_offset] = table_x; + m_range_for_table_x[table_x] = access_range (offset, next_offset); + } + table_x += 1; + } + m_num_columns = table_x - 1; + + if (logger) + log (*logger); + } + + unsigned get_num_columns () const + { + return m_num_columns; + } + + table::range_t get_table_x_for_range (const access_range &range) const + { + return table::range_t (get_table_x_for_offset (range.m_start), + get_table_x_for_prev_offset (range.m_next) + 1); + } + + table::rect_t get_table_rect (const access_range &range, + const int table_y, const int table_h) const + { + const table::range_t x_range (get_table_x_for_range (range)); + return table::rect_t (table::coord_t (x_range.start, table_y), + table::size_t (x_range.get_size (), table_h)); + } + + table::rect_t get_table_rect (const region *base_reg, + const bit_range &bits, + const int table_y, const int table_h) const + { + const access_range range (base_reg, bits); + return get_table_rect (range, table_y, table_h); + } + + table::rect_t get_table_rect (const region *base_reg, + const byte_range &bytes, + const int table_y, const int table_h) const + { + return get_table_rect (base_reg, bytes.as_bit_range (), table_y, table_h); + } + + bool maybe_get_access_range_for_table_x (int table_x, + access_range *out) const + { + auto slot = m_range_for_table_x.find (table_x); + if (slot == m_range_for_table_x.end ()) + return false; + *out = slot->second; + return true; + } + + void log (logger &logger) const + { + logger.log ("table columns"); + logger.inc_indent (); + for (unsigned table_x = 0; table_x < get_num_columns (); table_x++) + { + logger.start_log_line (); + logger.log_partial ("table_x: %i", table_x); + access_range range_for_column (NULL, bit_range (0, 0)); + if (maybe_get_access_range_for_table_x (table_x, &range_for_column)) + { + logger.log_partial (": range: "); + range_for_column.dump_to_pp (logger.get_printer (), true); + } + logger.end_log_line (); + } + logger.dec_indent (); + } + +private: + int get_table_x_for_offset (region_offset offset) const + { + auto slot = m_table_x_for_offset.find (offset); + + /* If this fails, then we probably failed to fully populate m_boundaries + in find_boundaries. */ + gcc_assert (slot != m_table_x_for_offset.end ()); + + return slot->second; + } + + int get_table_x_for_prev_offset (region_offset offset) const + { + auto slot = m_table_x_for_prev_offset.find (offset); + + /* If this fails, then we probably failed to fully populate m_boundaries + in find_boundaries. */ + gcc_assert (slot != m_table_x_for_prev_offset.end ()); + + return slot->second; + } + + std::map<region_offset, int> m_table_x_for_offset; + std::map<region_offset, int> m_table_x_for_prev_offset; + std::map<int, access_range> m_range_for_table_x; + unsigned m_num_columns; +}; + +/* Base class for something in the diagram that participates + in two steps of diagram creation: + (a) populating a boundaries instance with the boundaries of interest + (b) creating a table instance for itself. + + Offsets in the boundaries are all expressed relative to the base + region of the access_operation. */ + +class spatial_item +{ +public: + virtual void add_boundaries (boundaries &out, logger *) const = 0; + + virtual table make_table (const bit_to_table_map &btm, + style_manager &sm) const = 0; +}; + +/* Subclass of spatial_item for visualizing the region of memory + that's valid to access relative to the base region of region accessed in + the operation. */ + +class valid_region_spatial_item : public spatial_item +{ +public: + valid_region_spatial_item (const access_operation &op, + diagnostic_event_id_t region_creation_event_id) + : m_op (op), + m_region_creation_event_id (region_creation_event_id) + {} + + void add_boundaries (boundaries &out, logger *logger) const final override + { + LOG_SCOPE (logger); + access_range valid_bits = m_op.get_valid_bits (); + if (logger) + { + logger->start_log_line (); + logger->log_partial ("valid bits: "); + valid_bits.dump_to_pp (logger->get_printer (), true); + logger->end_log_line (); + } + out.add (valid_bits, boundaries::kind::HARD); + + /* Support for showing first and final element in array types. */ + if (tree base_type = m_op.m_base_region->get_type ()) + if (TREE_CODE (base_type) == ARRAY_TYPE) + { + if (logger) + logger->log ("showing first and final element in array type"); + region_model_manager *mgr = m_op.m_model.get_manager (); + tree domain = TYPE_DOMAIN (base_type); + if (TYPE_MIN_VALUE (domain) && TYPE_MAX_VALUE (domain)) + { + const svalue *min_idx_sval + = mgr->get_or_create_constant_svalue (TYPE_MIN_VALUE (domain)); + const svalue *max_idx_sval + = mgr->get_or_create_constant_svalue (TYPE_MAX_VALUE (domain)); + const region *min_element = + mgr->get_element_region (m_op.m_base_region, + TREE_TYPE (base_type), + min_idx_sval); + out.add (*min_element, mgr, boundaries::kind::SOFT); + const region *max_element = + mgr->get_element_region (m_op.m_base_region, + TREE_TYPE (base_type), + max_idx_sval); + out.add (*max_element, mgr, boundaries::kind::SOFT); + } + } + } + + /* Subroutine of make_table when base region has ARRAY_TYPE. */ + void add_array_elements_to_table (table &t, + const bit_to_table_map &btm, + style_manager &sm) const + { + tree base_type = m_op.m_base_region->get_type (); + gcc_assert (TREE_CODE (base_type) == ARRAY_TYPE); + + tree domain = TYPE_DOMAIN (base_type); + if (!(TYPE_MIN_VALUE (domain) && TYPE_MAX_VALUE (domain))) + return; + + region_model_manager * const mgr = m_op.get_manager (); + const int table_y = 0; + const int table_h = 1; + const table::range_t table_y_range (table_y, table_y + table_h); + + t.add_row (); + const svalue *min_idx_sval + = mgr->get_or_create_constant_svalue (TYPE_MIN_VALUE (domain)); + const region *min_element = mgr->get_element_region (m_op.m_base_region, + TREE_TYPE (base_type), + min_idx_sval); + const access_range min_element_range (*min_element, mgr); + const table::range_t min_element_x_range + = btm.get_table_x_for_range (min_element_range); + + t.set_cell_span (table::rect_t (min_element_x_range, + table_y_range), + fmt_styled_string (sm, "[%E]", + TYPE_MIN_VALUE (domain))); + + const svalue *max_idx_sval + = mgr->get_or_create_constant_svalue (TYPE_MAX_VALUE (domain)); + const region *max_element = mgr->get_element_region (m_op.m_base_region, + TREE_TYPE (base_type), + max_idx_sval); + if (min_element == max_element) + return; // 1-element array + + const access_range max_element_range (*max_element, mgr); + const table::range_t max_element_x_range + = btm.get_table_x_for_range (max_element_range); + t.set_cell_span (table::rect_t (max_element_x_range, + table_y_range), + fmt_styled_string (sm, "[%E]", + TYPE_MAX_VALUE (domain))); + + const table::range_t other_elements_x_range (min_element_x_range.next, + max_element_x_range.start); + if (other_elements_x_range.get_size () > 0) + t.set_cell_span (table::rect_t (other_elements_x_range, table_y_range), + styled_string (sm, "...")); + } + + table make_table (const bit_to_table_map &btm, + style_manager &sm) const final override + { + table t (table::size_t (btm.get_num_columns (), 1)); + + if (tree base_type = m_op.m_base_region->get_type ()) + if (TREE_CODE (base_type) == ARRAY_TYPE) + add_array_elements_to_table (t, btm, sm); + + access_range valid_bits = m_op.get_valid_bits (); + const int table_y = t.get_size ().h - 1; + const int table_h = 1; + table::rect_t rect = btm.get_table_rect (valid_bits, table_y, table_h); + styled_string s; + switch (m_op.m_base_region->get_kind ()) + { + default: + s = styled_string (sm, _("region")); + break; + case RK_DECL: + { + const decl_region *decl_reg + = as_a <const decl_region *> (m_op.m_base_region); + tree decl = decl_reg->get_decl (); + s = fmt_styled_string (sm, "%qE (type: %qT)", + decl, + TREE_TYPE (decl)); + } + break; + case RK_HEAP_ALLOCATED: + { + if (m_region_creation_event_id.known_p ()) + s = fmt_styled_string (sm, _("buffer allocated on heap at %@"), + &m_region_creation_event_id); + else + s = styled_string (sm, _("heap-allocated buffer")); + } + break; + case RK_ALLOCA: + { + if (m_region_creation_event_id.known_p ()) + s = fmt_styled_string (sm, _("buffer allocated on stack at %@"), + &m_region_creation_event_id); + else + s = styled_string (sm, _("stack-allocated buffer")); + } + break; + case RK_STRING: + { + const string_region *string_reg + = as_a <const string_region *> (m_op.m_base_region); + tree string_cst = string_reg->get_string_cst (); + s = fmt_styled_string (sm, _("string literal (type: %qT)"), + TREE_TYPE (string_cst)); + } + break; + } + t.set_cell_span (rect, std::move (s)); + + return t; + } + +private: + const access_operation &m_op; + diagnostic_event_id_t m_region_creation_event_id; +}; + +/* Subclass of spatial_item for visualizing the region of memory + that's actually accessed by the read or write, for reads and + for write cases where we don't know the svalue written. */ + +class accessed_region_spatial_item : public spatial_item +{ +public: + accessed_region_spatial_item (const access_operation &op) : m_op (op) {} + + void add_boundaries (boundaries &out, logger *logger) const final override + { + LOG_SCOPE (logger); + access_range actual_bits = m_op.get_actual_bits (); + if (logger) + { + logger->start_log_line (); + logger->log_partial ("actual bits: "); + actual_bits.dump_to_pp (logger->get_printer (), true); + logger->end_log_line (); + } + out.add (actual_bits, boundaries::kind::HARD); + } + + table make_table (const bit_to_table_map &btm, + style_manager &sm) const final override + { + table t (table::size_t (btm.get_num_columns (), 1)); + + access_range actual_bits = m_op.get_actual_bits (); + const int table_y = 0; + const int table_h = 1; + table::rect_t rect = btm.get_table_rect (actual_bits, table_y, table_h); + t.set_cell_span (rect, styled_string (get_label_string (sm))); + + return t; + } + +private: + styled_string get_label_string (style_manager &sm) const + { + const access_range accessed_bits (m_op.get_actual_bits ()); + return get_access_size_str (sm, + m_op, + accessed_bits, + m_op.m_reg.get_type ()); + } + + const access_operation &m_op; +}; + +/* Subclass of spatial_item for when we know the svalue being written + to the accessed region. + Can be subclassed to give visualizations of specific kinds of svalue. */ + +class svalue_spatial_item : public spatial_item +{ +public: + static std::unique_ptr<svalue_spatial_item> make (const access_operation &op, + const svalue &sval, + access_range actual_bits, + const theme &theme); + + svalue_spatial_item (const access_operation &op, + const svalue &sval, + access_range actual_bits) + : m_op (op), m_sval (sval), m_actual_bits (actual_bits) + {} + + void add_boundaries (boundaries &out, logger *logger) const override + { + LOG_SCOPE (logger); + out.add (m_actual_bits, boundaries::kind::HARD); + } + + table make_table (const bit_to_table_map &btm, + style_manager &sm) const override + { + table t (table::size_t (btm.get_num_columns (), 0)); + + const int table_y = t.add_row (); + const int table_h = 1; + table::rect_t rect = btm.get_table_rect (m_actual_bits, table_y, table_h); + t.set_cell_span (rect, styled_string (get_label_string (sm))); + return t; + } + +protected: + styled_string get_label_string (style_manager &sm) const + { + tree rep_tree = m_op.m_model.get_representative_tree (&m_sval); + if (rep_tree) + { + if (TREE_CODE (rep_tree) == SSA_NAME) + rep_tree = SSA_NAME_VAR (rep_tree); + switch (TREE_CODE (rep_tree)) + { + default: + break; + case INTEGER_CST: + return fmt_styled_string (sm, _("write of %<(%T) %E%>"), + TREE_TYPE (rep_tree), + rep_tree); + + case PARM_DECL: + case VAR_DECL: + return fmt_styled_string (sm, _("write from %qE (type: %qT)"), + rep_tree, + TREE_TYPE (rep_tree)); + break; + } + } + + const access_range accessed_bits (m_op.get_actual_bits ()); + return get_access_size_str (sm, + m_op, + accessed_bits, + m_sval.get_type ()); + } + + const access_operation &m_op; + const svalue &m_sval; + access_range m_actual_bits; +}; + +/* Subclass of svalue_spatial_item for initial_svalue of a string_region + i.e. for string literals. + + There are three cases: + (a) for long strings, show just the head and tail of the string, + with an ellipsis: + +---+---+---+---+---+---+----------+-----+-----+-----+-----+-----+-----+ + |[0]|[1]|[2]|[3]|[4]|[5]| |[440]|[441]|[442]|[443]|[444]|[445]| + +---+---+---+---+---+---+ ... +-----+-----+-----+-----+-----+-----+ + |‘L’|‘o’|‘r’|‘e’|‘m’|‘ ’| | ‘o’ | ‘r’ | ‘u’ | ‘m’ | ‘.’ | NUL | + +---+---+---+---+---+---+----------+-----+-----+-----+-----+-----+-----+ + | string literal (type: ‘char[446]’) | + +----------------------------------------------------------------------+ + (b) For sufficiently short strings, show the full string: + +----------+---------+---------+---------+---------+ +-----------------+ + | [0] | [1] | [2] | [3] | [4] | | [5] | + +----------+---------+---------+---------+---------+ +-----------------+ + | ‘h’ | ‘e’ | ‘l’ | ‘l’ | ‘o’ | | NUL | + +----------+---------+---------+---------+---------+-+-----------------+ + | string literal (type: ‘char[6]’) | + +----------------------------------------------------------------------+ + (c) for non-ASCII strings that are short enough to show the full string, + show how unicode code points of the bytes decoded as UTF-8: + +-----+-----+-----+----+----++----+----+----+----+----+----+----+------+ + | [0] | [1] | [2] |[3] |[4] ||[5] |[6] |[7] |[8] |[9] |[10]|[11]| [12] | + +-----+-----+-----+----+----++----+----+----+----+----+----+----+------+ + |0xe6 |0x96 |0x87 |0xe5|0xad||0x97|0xe5|0x8c|0x96|0xe3|0x81|0x91| 0x00 | + +-----+-----+-----+----+----++----+----+----+----+----+----+----+------+ + | U+6587 | U+5b57 | U+5316 | U+3051 |U+0000| + +-----------------+---------------+--------------+--------------+------+ + | string literal (type: ‘char[13]’) | + +----------------------------------------------------------------------+ + and show the characters themselves if unicode is supported and they are not + control characters: + ┌─────┬─────┬─────┬────┬────┐┌────┬────┬────┬────┬────┬────┬────┬──────┐ + │ [0] │ [1] │ [2] │[3] │[4] ││[5] │[6] │[7] │[8] │[9] │[10]│[11]│ [12] │ + ├─────┼─────┼─────┼────┼────┤├────┼────┼────┼────┼────┼────┼────┼──────┤ + │0xe6 │0x96 │0x87 │0xe5│0xad││0x97│0xe5│0x8c│0x96│0xe3│0x81│0x91│ 0x00 │ + ├─────┴─────┴─────┼────┴────┴┴────┼────┴────┴────┼────┴────┴────┼──────┤ + │ U+6587 │ U+5b57 │ U+5316 │ U+3051 │U+0000│ + ├─────────────────┼───────────────┼──────────────┼──────────────┼──────┤ + │ 文 │ 字 │ 化 │ け │ NUL │ + ├─────────────────┴───────────────┴──────────────┴──────────────┴──────┤ + │ string literal (type: ‘char[13]’) │ + └──────────────────────────────────────────────────────────────────────┘ +*/ + +class string_region_spatial_item : public svalue_spatial_item +{ +public: + string_region_spatial_item (const access_operation &op, + const svalue &sval, + access_range actual_bits, + const string_region &string_reg, + const theme &theme) + : svalue_spatial_item (op, sval, actual_bits), + m_string_reg (string_reg), + m_theme (theme), + m_ellipsis_threshold (param_analyzer_text_art_string_ellipsis_threshold), + m_ellipsis_head_len (param_analyzer_text_art_string_ellipsis_head_len), + m_ellipsis_tail_len (param_analyzer_text_art_string_ellipsis_tail_len), + m_show_full_string (calc_show_full_string ()), + m_show_utf8 (m_show_full_string && !pure_ascii_p ()) + { + } + + void add_boundaries (boundaries &out, logger *logger) const override + { + LOG_SCOPE (logger); + out.add (m_actual_bits, boundaries::kind::HARD); + + tree string_cst = get_string_cst (); + /* TREE_STRING_LENGTH is sizeof, not strlen. */ + if (m_show_full_string) + out.add_all_bytes_in_range (m_actual_bits); + else + { + byte_range head_of_string (0, m_ellipsis_head_len); + out.add_all_bytes_in_range (head_of_string); + byte_range tail_of_string + (TREE_STRING_LENGTH (string_cst) - m_ellipsis_tail_len, + m_ellipsis_tail_len); + out.add_all_bytes_in_range (tail_of_string); + /* Adding the above pair of ranges will also effectively add + the boundaries of the range of ellipsized chars, as they're + exactly in between head_of_string and tail_of_string. */ + } + } + + table make_table (const bit_to_table_map &btm, + style_manager &sm) const override + { + table t (table::size_t (btm.get_num_columns (), 0)); + + const int byte_idx_table_y = t.add_row (); + const int byte_val_table_y = t.add_row (); + + byte_range bytes (0, 0); + bool valid = m_actual_bits.as_concrete_byte_range (&bytes); + gcc_assert (valid); + tree string_cst = get_string_cst (); + if (m_show_full_string) + { + for (byte_offset_t byte_idx = bytes.get_start_byte_offset (); + byte_idx < bytes.get_next_byte_offset (); + byte_idx = byte_idx + 1) + add_column_for_byte (t, btm, sm, byte_idx, + byte_idx_table_y, byte_val_table_y); + + if (m_show_utf8) + { + const bool show_unichars = m_theme.unicode_p (); + const int utf8_code_point_table_y = t.add_row (); + int utf8_character_table_y; + if (show_unichars) + utf8_character_table_y = t.add_row (); + + /* We don't actually want the display widths here, but + it's an easy way to decode UTF-8. */ + cpp_char_column_policy policy (8, cpp_wcwidth); + cpp_display_width_computation dw (TREE_STRING_POINTER (string_cst), + TREE_STRING_LENGTH (string_cst), + policy); + while (!dw.done ()) + { + cpp_decoded_char decoded_char; + dw.process_next_codepoint (&decoded_char); + + if (!decoded_char.m_valid_ch) + continue; + size_t start_byte_idx + = decoded_char.m_start_byte - TREE_STRING_POINTER (string_cst); + byte_size_t size_in_bytes + = decoded_char.m_next_byte - decoded_char.m_start_byte; + byte_range bytes (start_byte_idx, size_in_bytes); + + const table::rect_t code_point_table_rect + = btm.get_table_rect (&m_string_reg, bytes, + utf8_code_point_table_y, 1); + char buf[100]; + sprintf (buf, "U+%04x", decoded_char.m_ch); + t.set_cell_span (code_point_table_rect, + styled_string (sm, buf)); + + if (show_unichars) + { + const table::rect_t character_table_rect + = btm.get_table_rect (&m_string_reg, bytes, + utf8_character_table_y, 1); + if (cpp_is_printable_char (decoded_char.m_ch)) + t.set_cell_span (character_table_rect, + styled_string (decoded_char.m_ch)); + else if (decoded_char.m_ch == 0) + t.set_cell_span (character_table_rect, + styled_string (sm, "NUL")); + else + t.set_cell_span (character_table_rect, + styled_string (sm, "")); + } + } + } + } + else + { + /* Head of string. */ + for (int byte_idx = 0; byte_idx < m_ellipsis_head_len; byte_idx++) + add_column_for_byte (t, btm, sm, byte_idx, + byte_idx_table_y, byte_val_table_y); + + /* Ellipsis (two rows high). */ + const byte_range ellipsis_bytes + (m_ellipsis_head_len, + TREE_STRING_LENGTH (string_cst) + - (m_ellipsis_head_len + m_ellipsis_tail_len)); + const table::rect_t table_rect + = btm.get_table_rect (&m_string_reg, ellipsis_bytes, + byte_idx_table_y, 2); + t.set_cell_span(table_rect, styled_string (sm, "...")); + + /* Tail of string. */ + for (int byte_idx + = (TREE_STRING_LENGTH (string_cst) - m_ellipsis_tail_len); + byte_idx < TREE_STRING_LENGTH (string_cst); + byte_idx++) + add_column_for_byte (t, btm, sm, byte_idx, + byte_idx_table_y, byte_val_table_y); + } + + const int summary_table_y = t.add_row (); + t.set_cell_span (btm.get_table_rect (&m_string_reg, bytes, + summary_table_y, 1), + fmt_styled_string (sm, + _("string literal (type: %qT)"), + TREE_TYPE (string_cst))); + + return t; + } + + tree get_string_cst () const { return m_string_reg.get_string_cst (); } + +private: + bool calc_show_full_string () const + { + tree string_cst = get_string_cst (); + if (TREE_STRING_LENGTH (string_cst) < m_ellipsis_threshold) + return true; + if (TREE_STRING_LENGTH (string_cst) < + (m_ellipsis_head_len + m_ellipsis_tail_len)) + return true; + return false; + } + + bool pure_ascii_p () const + { + tree string_cst = get_string_cst (); + for (unsigned byte_idx = 0; + byte_idx < (unsigned) TREE_STRING_LENGTH (string_cst); + byte_idx++) + { + unsigned char ch = TREE_STRING_POINTER (string_cst)[byte_idx]; + if (ch >= 0x80) + return false; + } + return true; + } + + void add_column_for_byte (table &t, const bit_to_table_map &btm, + style_manager &sm, + const byte_offset_t byte_idx, + const int byte_idx_table_y, + const int byte_val_table_y) const + { + tree string_cst = get_string_cst (); + gcc_assert (byte_idx >= 0); + gcc_assert (byte_idx < TREE_STRING_LENGTH (string_cst)); + + const byte_range bytes (byte_idx, 1); + if (1) // show_byte_indices + { + const table::rect_t idx_table_rect + = btm.get_table_rect (&m_string_reg, bytes, byte_idx_table_y, 1); + t.set_cell_span (idx_table_rect, + fmt_styled_string (sm, "[%li]", + byte_idx.ulow ())); + } + + char byte_val = TREE_STRING_POINTER (string_cst)[byte_idx.ulow ()]; + const table::rect_t val_table_rect + = btm.get_table_rect (&m_string_reg, bytes, byte_val_table_y, 1); + table_cell_content content (make_cell_content_for_byte (sm, byte_val)); + t.set_cell_span (val_table_rect, std::move (content)); + } + + table_cell_content make_cell_content_for_byte (style_manager &sm, + unsigned char byte_val) const + { + if (!m_show_utf8) + { + if (byte_val == '\0') + return styled_string (sm, "NUL"); + else if (byte_val < 0x80) + if (ISPRINT (byte_val)) + return fmt_styled_string (sm, "%qc", byte_val); + } + char buf[100]; + sprintf (buf, "0x%02x", byte_val); + return styled_string (sm, buf); + } + + const string_region &m_string_reg; + const theme &m_theme; + const int m_ellipsis_threshold; + const int m_ellipsis_head_len; + const int m_ellipsis_tail_len; + const bool m_show_full_string; + const bool m_show_utf8; +}; + +std::unique_ptr<svalue_spatial_item> +svalue_spatial_item::make (const access_operation &op, + const svalue &sval, + access_range actual_bits, + const theme &theme) +{ + if (const initial_svalue *initial_sval = sval.dyn_cast_initial_svalue ()) + if (const string_region *string_reg + = initial_sval->get_region ()->dyn_cast_string_region ()) + return make_unique <string_region_spatial_item> (op, sval, actual_bits, + *string_reg, theme); + return make_unique <svalue_spatial_item> (op, sval, actual_bits); +} + +/* Widget subclass implementing access diagrams. */ + +class access_diagram_impl : public vbox_widget +{ +public: + access_diagram_impl (const access_operation &op, + diagnostic_event_id_t region_creation_event_id, + style_manager &sm, + const theme &theme, + logger *logger) + : m_op (op), + m_region_creation_event_id (region_creation_event_id), + m_sm (sm), + m_theme (theme), + m_logger (logger), + m_invalid (false), + m_valid_region_spatial_item (op, region_creation_event_id), + m_accessed_region_spatial_item (op), + m_btm (), + m_calc_req_size_called (false) + { + LOG_SCOPE (logger); + + if (logger) + { + access_range invalid_before_bits; + if (op.maybe_get_invalid_before_bits (&invalid_before_bits)) + invalid_before_bits.log ("invalid before range", *logger); + access_range invalid_after_bits; + if (op.maybe_get_invalid_after_bits (&invalid_after_bits)) + invalid_after_bits.log ("invalid after range", *logger); + + if (op.m_sval_hint) + { + logger->start_log_line (); + logger->log_partial ("sval_hint: "); + op.m_sval_hint->dump_to_pp (logger->get_printer (), true); + logger->end_log_line (); + } + } + + /* Register painting styles. */ + { + style valid_style; + valid_style.m_fg_color = style::named_color::GREEN; + valid_style.m_bold = true; + m_valid_style_id = m_sm.get_or_create_id (valid_style); + + style invalid_style; + invalid_style.m_fg_color = style::named_color::RED; + invalid_style.m_bold = true; + m_invalid_style_id = m_sm.get_or_create_id (invalid_style); + } + + if (op.m_sval_hint) + { + access_range actual_bits = m_op.get_actual_bits (); + m_svalue_spatial_item = svalue_spatial_item::make (m_op, + *op.m_sval_hint, + actual_bits, + m_theme); + } + + /* Two passes: + First, figure out all of the boundaries of interest. + Then use that to build child widgets showing the regions of interest, + with a common tabular layout. */ + + m_boundaries = find_boundaries (); + if (logger) + m_boundaries->log (*logger); + + /* Populate m_table_x_for_bit and m_bit_for_table_x. + Each table column represents the range [offset, next_offset). + We don't create a column in the table for the final offset, but we + do populate it, so that looking at the table_x of one beyond the + final table column gives us the upper bound offset. */ + m_btm.populate (*m_boundaries, logger); + + /* Gracefully reject cases where the boundary sorting has gone wrong + (due to awkward combinations of symbolic values). */ + { + table::range_t actual_bits_x_range + = m_btm.get_table_x_for_range (m_op.get_actual_bits ()); + if (actual_bits_x_range.get_size () <= 0) + { + if (logger) + logger->log ("giving up: bad table columns for actual_bits"); + m_invalid = true; + return; + } + table::range_t valid_bits_x_range + = m_btm.get_table_x_for_range (m_op.get_valid_bits ()); + if (valid_bits_x_range.get_size () <= 0) + { + if (logger) + logger->log ("giving up: bad table columns for valid_bits"); + m_invalid = true; + return; + } + } + + m_col_widths + = make_unique <table_dimension_sizes> (m_btm.get_num_columns ()); + + /* Now create child widgets. */ + + if (flag_analyzer_debug_text_art) + { + table t_headings (make_headings_table ()); + add_aligned_child_table (std::move (t_headings)); + } + + if (m_svalue_spatial_item) + { + table t_sval (m_svalue_spatial_item->make_table (m_btm, m_sm)); + add_aligned_child_table (std::move (t_sval)); + } + else + { + table t_accessed + (m_accessed_region_spatial_item.make_table (m_btm, m_sm)); + add_aligned_child_table (std::move (t_accessed)); + } + + add_direction_widget (); + + table t_valid (m_valid_region_spatial_item.make_table (m_btm, m_sm)); + add_invalid_accesses_to_region_table (t_valid); + add_aligned_child_table (std::move (t_valid)); + + add_valid_vs_invalid_ruler (); + } + + const char *get_desc () const override + { + return "access_diagram_impl"; + } + + canvas::size_t calc_req_size () final override + { + if (m_invalid) + return canvas::size_t (0, 0); + + /* Now compute the size requirements for the tables. */ + for (auto iter : m_aligned_table_widgets) + iter->get_cell_sizes ().pass_1 (iter->get_table ()); + for (auto iter : m_aligned_table_widgets) + iter->get_cell_sizes ().pass_2 (iter->get_table ()); + + adjust_to_scale(); + + /* ...and relayout the tables. */ + for (auto iter : m_aligned_table_widgets) + iter->recalc_coords (); + + /* Populate the canvas_x per table_x. */ + m_col_start_x.clear (); + int iter_canvas_x = 0; + for (auto w : m_col_widths->m_requirements) + { + m_col_start_x.push_back (iter_canvas_x); + iter_canvas_x += w + 1; + } + m_col_start_x.push_back (iter_canvas_x); + + m_calc_req_size_called = true; + + return vbox_widget::calc_req_size (); + } + + int get_canvas_x_for_table_x (int table_x) const + { + gcc_assert (m_calc_req_size_called); + return m_col_start_x[table_x]; + } + + canvas::range_t get_canvas_x_range (const table::range_t &table_x_range) const + { + gcc_assert (m_calc_req_size_called); + return canvas::range_t (get_canvas_x_for_table_x (table_x_range.start), + get_canvas_x_for_table_x (table_x_range.next)); + } + + const access_operation &get_op () const { return m_op; } + + style::id_t get_style_id_for_validity (bool is_valid) const + { + return is_valid ? m_valid_style_id : m_invalid_style_id; + } + + const theme &get_theme () const { return m_theme; } + +private: + /* Figure out all of the boundaries of interest when visualizing ths op. */ + std::unique_ptr<boundaries> + find_boundaries () const + { + std::unique_ptr<boundaries> result + = make_unique<boundaries> (*m_op.m_base_region); + + m_valid_region_spatial_item.add_boundaries (*result, m_logger); + m_accessed_region_spatial_item.add_boundaries (*result, m_logger); + if (m_svalue_spatial_item) + m_svalue_spatial_item->add_boundaries (*result, m_logger); + + return result; + } + + void add_aligned_child_table (table t) + { + x_aligned_table_widget *w + = new x_aligned_table_widget (std::move (t), m_theme, *m_col_widths); + m_aligned_table_widgets.push_back (w); + add_child (std::unique_ptr<widget> (w)); + } + + /* Create a table showing headings for use by -fanalyzer-debug-text-art, for + example: + +---------+-----------+-----------+---+--------------------------------+ + | tc0 | tc1 | tc2 |tc3| tc4 | + +---------+-----------+-----------+---+--------------------------------+ + |bytes 0-3|bytes 4-35 |bytes 36-39| | bytes 40-43 | + +---------+-----------+-----------+ +--------------------------------+ + which has: + - a row showing the table column numbers, labelled "tc0", "tc1", etc + - a row showing the memory range of each table column that has one. */ + + table make_headings_table () const + { + table t (table::size_t (m_btm.get_num_columns (), 2)); + + for (int table_x = 0; table_x < t.get_size ().w; table_x++) + { + const int table_y = 0; + t.set_cell (table::coord_t (table_x, table_y), + fmt_styled_string (m_sm, "tc%i", table_x)); + } + for (int table_x = 0; table_x < t.get_size ().w; table_x++) + { + const int table_y = 1; + access_range range_for_column (NULL, bit_range (0, 0)); + if (m_btm.maybe_get_access_range_for_table_x (table_x, + &range_for_column)) + { + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + range_for_column.dump_to_pp (&pp, true); + t.set_cell (table::coord_t (table_x, table_y), + styled_string (m_sm, pp_formatted_text (&pp))); + } + } + + return t; + } + + void add_direction_widget () + { + add_child (::make_unique<direction_widget> (*this, m_btm)); + } + + void add_invalid_accesses_to_region_table (table &t_region) + { + gcc_assert (t_region.get_size ().w == (int)m_btm.get_num_columns ()); + + const int table_y = 0; + const int table_h = t_region.get_size ().h; + + access_range invalid_before_bits; + if (m_op.maybe_get_invalid_before_bits (&invalid_before_bits)) + { + t_region.set_cell_span (m_btm.get_table_rect (invalid_before_bits, + table_y, table_h), + styled_string (m_sm, + _("before valid range"))); + } + access_range invalid_after_bits; + if (m_op.maybe_get_invalid_after_bits (&invalid_after_bits)) + { + t_region.set_cell_span (m_btm.get_table_rect (invalid_after_bits, + table_y, table_h), + styled_string (m_sm, + _("after valid range"))); + } + } + + void maybe_add_gap (x_aligned_x_ruler_widget *w, + const access_range &lower, + const access_range &upper) const + { + LOG_SCOPE (m_logger); + if (m_logger) + { + lower.log ("lower", *m_logger); + upper.log ("upper", *m_logger); + } + tree lower_next = lower.m_next.calc_symbolic_bit_offset (m_op.m_model); + if (!lower_next) + { + if (m_logger) + m_logger->log ("failed to get lower_next"); + return; + } + tree upper_start = upper.m_start.calc_symbolic_bit_offset (m_op.m_model); + if (!upper_start) + { + if (m_logger) + m_logger->log ("failed to get upper_start"); + return; + } + tree num_bits_gap = fold_build2 (MINUS_EXPR, + size_type_node, + upper_start, lower_next); + if (m_logger) + m_logger->log ("num_bits_gap: %qE", num_bits_gap); + tree zero = build_int_cst (size_type_node, 0); + tristate ts_gt_zero = m_op.m_model.eval_condition (num_bits_gap, + GT_EXPR, + zero, + NULL); + if (ts_gt_zero.is_false ()) + { + if (m_logger) + m_logger->log ("rejecting as not > 0"); + return; + } + + bit_size_expr num_bits (num_bits_gap); + styled_string label = num_bits.get_formatted_str (m_sm, + _("%wi bit"), + _("%wi bits"), + _("%wi byte"), + _("%wi bytes"), + _("%qE bits"), + _("%qE bytes")); + w->add_range (m_btm.get_table_x_for_range (access_range (lower.m_next, + upper.m_start)), + std::move (label), + style::id_plain); + } + + styled_string + make_warning_string (styled_string &&text) + { + styled_string result; + if (!m_theme.emojis_p ()) + return std::move (text); + + result.append (styled_string (0x26A0, /* U+26A0 WARNING SIGN. */ + true)); + /* U+26A0 WARNING SIGN has East_Asian_Width == Neutral, but in its + emoji variant is printed (by vte at least) with a 2nd half + overlapping the next char. Hence we add two spaces here: a space + to be covered by this overlap, plus another space of padding. */ + result.append (styled_string (m_sm, " ")); + result.append (std::move (text)); + return result; + } + + /* Add a ruler child widet showing valid, invalid, and gaps. */ + void add_valid_vs_invalid_ruler () + { + LOG_SCOPE (m_logger); + + x_aligned_x_ruler_widget *w + = new x_aligned_x_ruler_widget (*this, m_theme, *m_col_widths); + + access_range invalid_before_bits; + if (m_op.maybe_get_invalid_before_bits (&invalid_before_bits)) + { + if (m_logger) + invalid_before_bits.log ("invalid_before_bits", *m_logger); + bit_size_expr num_before_bits; + if (invalid_before_bits.get_size (m_op.m_model, &num_before_bits)) + { + styled_string label; + if (m_op.m_dir == DIR_READ) + label = num_before_bits.get_formatted_str + (m_sm, + _("under-read of %wi bit"), + _("under-read of %wi bits"), + _("under-read of %wi byte"), + _("under-read of %wi bytes"), + _("under-read of %qE bits"), + _("under-read of %qE bytes")); + else + label = num_before_bits.get_formatted_str + (m_sm, + _("underwrite of %wi bit"), + _("underwrite of %wi bits"), + _("underwrite of %wi byte"), + _("underwrite of %wi bytes"), + _("underwrite of %qE bits"), + _("underwrite of %qE bytes")); + w->add_range (m_btm.get_table_x_for_range (invalid_before_bits), + make_warning_string (std::move (label)), + m_invalid_style_id); + } + } + else + { + if (m_logger) + m_logger->log ("no invalid_before_bits"); + } + + /* It would be nice to be able to use std::optional<access_range> here, + but std::optional is C++17. */ + bool got_valid_bits = false; + access_range valid_bits (m_op.get_valid_bits ()); + bit_size_expr num_valid_bits; + if (valid_bits.get_size (m_op.m_model, &num_valid_bits)) + { + if (m_logger) + valid_bits.log ("valid_bits", *m_logger); + + got_valid_bits = true; + maybe_add_gap (w, invalid_before_bits, valid_bits); + + styled_string label; + if (m_op.m_dir == DIR_READ) + label = num_valid_bits.get_formatted_str (m_sm, + _("size: %wi bit"), + _("size: %wi bits"), + _("size: %wi byte"), + _("size: %wi bytes"), + _("size: %qE bits"), + _("size: %qE bytes")); + else + label = num_valid_bits.get_formatted_str (m_sm, + _("capacity: %wi bit"), + _("capacity: %wi bits"), + _("capacity: %wi byte"), + _("capacity: %wi bytes"), + _("capacity: %qE bits"), + _("capacity: %qE bytes")); + w->add_range (m_btm.get_table_x_for_range (m_op.get_valid_bits ()), + std::move (label), + m_valid_style_id); + } + + access_range invalid_after_bits; + if (m_op.maybe_get_invalid_after_bits (&invalid_after_bits)) + { + if (got_valid_bits) + maybe_add_gap (w, valid_bits, invalid_after_bits); + + if (m_logger) + invalid_before_bits.log ("invalid_after_bits", *m_logger); + + bit_size_expr num_after_bits; + if (invalid_after_bits.get_size (m_op.m_model, &num_after_bits)) + { + styled_string label; + if (m_op.m_dir == DIR_READ) + label = num_after_bits.get_formatted_str + (m_sm, + _("over-read of %wi bit"), + _("over-read of %wi bits"), + _("over-read of %wi byte"), + _("over-read of %wi bytes"), + _("over-read of %qE bits"), + _("over-read of %qE bytes")); + else + label = num_after_bits.get_formatted_str + (m_sm, + _("overflow of %wi bit"), + _("overflow of %wi bits"), + _("overflow of %wi byte"), + _("overflow of %wi bytes"), + _("over-read of %qE bits"), + _("overflow of %qE bytes")); + w->add_range (m_btm.get_table_x_for_range (invalid_after_bits), + make_warning_string (std::move (label)), + m_invalid_style_id); + } + } + else + { + if (m_logger) + m_logger->log ("no invalid_after_bits"); + } + + add_child (std::unique_ptr<widget> (w)); + } + + /* Subroutine of calc_req_size. + Try to allocate surplus canvas width to table columns to make the + per table-column canvas widths closer to being to scale. + See e.g.: + https://en.wikipedia.org/wiki/Fair_item_allocation + https://en.wikipedia.org/wiki/Mathematics_of_apportionment + */ + void adjust_to_scale () + { + LOG_SCOPE (m_logger); + const unsigned num_columns = m_btm.get_num_columns (); + std::vector<bit_offset_t> bit_sizes (num_columns); + for (unsigned table_x = 0; table_x < num_columns; table_x++) + { + access_range range_for_column (NULL, bit_range (0, 0)); + if (m_btm.maybe_get_access_range_for_table_x (table_x, + &range_for_column)) + { + bit_size_t size_in_bits; + if (!range_for_column.get_size_in_bits (&size_in_bits)) + size_in_bits = BITS_PER_UNIT; // arbitrary non-zero value + gcc_assert (size_in_bits > 0); + bit_sizes[table_x] = size_in_bits; + } + else + bit_sizes[table_x] = 0; + } + + while (adjust_to_scale_once (bit_sizes)) + { + } + } + bool adjust_to_scale_once (const std::vector<bit_offset_t> &bit_sizes) + { + LOG_SCOPE (m_logger); + + const unsigned num_columns = m_btm.get_num_columns (); + + /* Find the total canvas width currently required. + Require one extra canvas column for the right-hand border + of the table. */ + int total_width = 1; + for (unsigned table_x = 0; table_x < num_columns; table_x++) + { + int canvas_w = m_col_widths->m_requirements[table_x]; + gcc_assert (canvas_w >= 0); + total_width += canvas_w + 1; + } + + const int max_width = param_analyzer_text_art_ideal_canvas_width; + if (total_width >= max_width) + { + if (m_logger) + m_logger->log ("bailing out: total_width=%i ,>= max_width (%i)\n", + total_width, max_width); + return false; + } + + const int fixed_point = 1024; + std::vector<bit_offset_t> canvas_w_per_bit (num_columns); + for (unsigned table_x = 0; table_x < num_columns; table_x++) + { + bit_offset_t bit_size = bit_sizes[table_x]; + if (bit_size > 0) + canvas_w_per_bit[table_x] + = (m_col_widths->m_requirements[table_x] * fixed_point) / bit_size; + else + canvas_w_per_bit[table_x] = INT_MAX; + } + + /* Find the min canvas per bit, and give an extra canvas column to + the table column that has least. */ + size_t min_idx = std::distance (canvas_w_per_bit.begin (), + std::min_element (canvas_w_per_bit.begin (), + canvas_w_per_bit.end ())); + m_col_widths->m_requirements[min_idx] += 1; + if (m_logger) + m_logger->log ("adding 1 canvas_w to column %i\n", (int)min_idx); + + return true; // keep going + } + + const access_operation &m_op; + diagnostic_event_id_t m_region_creation_event_id; + style_manager &m_sm; + const theme &m_theme; + logger *m_logger; + /* In lieu of being able to throw exceptions, a flag to mark this object + as "invalid". */ + bool m_invalid; + + style::id_t m_valid_style_id; + style::id_t m_invalid_style_id; + + valid_region_spatial_item m_valid_region_spatial_item; + accessed_region_spatial_item m_accessed_region_spatial_item; + std::unique_ptr<svalue_spatial_item> m_svalue_spatial_item; + + std::unique_ptr<boundaries> m_boundaries; + + bit_to_table_map m_btm; + + bool m_calc_req_size_called; + + /* Column widths shared by all x_aligned_table_widget, + created once we know how many columns we need. */ + std::unique_ptr<table_dimension_sizes> m_col_widths; + + /* All of the child x_aligned_table_widget that share + column widths. */ + std::vector<x_aligned_table_widget *> m_aligned_table_widgets; + +/* Mapping from table_x to canvas_x. */ + std::vector<int> m_col_start_x; +}; + +x_ruler +x_aligned_x_ruler_widget::make_x_ruler () const +{ + x_ruler r (x_ruler::label_dir::BELOW); + for (auto& iter : m_labels) + { + canvas::range_t canvas_x_range + = m_dia_impl.get_canvas_x_range (iter.m_table_x_range); + /* Include the end-point. */ + canvas_x_range.next++; + r.add_label (canvas_x_range, iter.m_text.copy (), iter.m_style_id, + x_ruler::label_kind::TEXT_WITH_BORDER); + } + return r; +} + +/* class direction_widget : public leaf_widget. */ + +/* Paint arrows indicating the direction of the access (read vs write), + but only in the X-extent corresponding to the region that's actually + accessed. */ + +void +direction_widget::paint_to_canvas (canvas &canvas) +{ + const access_range accessed_bits (m_dia_impl.get_op ().get_actual_bits ()); + + const access_range valid_bits (m_dia_impl.get_op ().get_valid_bits ()); + + for (unsigned table_x = 0; table_x < m_btm.get_num_columns (); table_x++) + { + access_range column_access_range; + if (m_btm.maybe_get_access_range_for_table_x (table_x, + &column_access_range)) + { + /* Only paint arrows in the accessed region. */ + if (!accessed_bits.contains_p (column_access_range)) + continue; + + /* Are we within the valid region? */ + const bool is_valid (valid_bits.contains_p (column_access_range)); + const style::id_t style_id + = m_dia_impl.get_style_id_for_validity (is_valid); + const canvas::range_t x_canvas_range + = m_dia_impl.get_canvas_x_range (table::range_t (table_x, + table_x + 1)); + const int canvas_x = x_canvas_range.get_midpoint (); + m_dia_impl.get_theme ().paint_y_arrow + (canvas, + canvas_x, + canvas::range_t (get_y_range ()), + (m_dia_impl.get_op ().m_dir == DIR_READ + ? theme::y_arrow_dir::UP + : theme::y_arrow_dir::DOWN), + style_id); + } + } +} + +/* class access_diagram : public text_art::wrapper_widget. */ + +/* To hide the implementation details, this is merely a wrapper around + an access_diagram_impl. */ + +access_diagram::access_diagram (const access_operation &op, + diagnostic_event_id_t region_creation_event_id, + style_manager &sm, + const theme &theme, + logger *logger) +: wrapper_widget (make_unique <access_diagram_impl> (op, + region_creation_event_id, + sm, + theme, + logger)) +{ +} + +} // namespace ana + +#endif /* #if ENABLE_ANALYZER */ diff --git a/gcc/analyzer/access-diagram.h b/gcc/analyzer/access-diagram.h new file mode 100644 index 0000000..c124e80 --- /dev/null +++ b/gcc/analyzer/access-diagram.h @@ -0,0 +1,165 @@ +/* Text art visualizations within -fanalyzer. + Copyright (C) 2023 Free Software Foundation, Inc. + Contributed by David Malcolm <dmalcolm@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_ANALYZER_ACCESS_DIAGRAM_H +#define GCC_ANALYZER_ACCESS_DIAGRAM_H + +#include "text-art/canvas.h" +#include "text-art/theme.h" +#include "text-art/widget.h" +#include "analyzer/analyzer.h" +#include "analyzer/store.h" + +namespace ana { + +class bit_size_expr +{ +public: + bit_size_expr () : m_num_bits (NULL) {} + bit_size_expr (tree num_bits) : m_num_bits (num_bits) {} + + text_art::styled_string + get_formatted_str (text_art::style_manager &sm, + const char *concrete_single_bit_fmt, + const char *concrete_plural_bits_fmt, + const char *concrete_single_byte_fmt, + const char *concrete_plural_bytes_fmt, + const char *symbolic_bits_fmt, + const char *symbolic_bytes_fmt) const; + void print (pretty_printer *pp) const; + + tree maybe_get_as_bytes () const; + +private: + tree m_num_bits; +}; + +/* A range of bits within a base region, where each endpoint + could be concrete or symbolic (not necessarily the same). */ + +struct access_range +{ + access_range () + : m_start (), m_next () + { + } + access_range (region_offset start, region_offset next) + : m_start (start), m_next (next) + {} + access_range (const region *base_region, const bit_range &bits); + access_range (const region *base_region, const byte_range &bytes); + access_range (const region ®, region_model_manager *); + + bool concrete_p () const + { + return m_start.concrete_p () && m_next.concrete_p (); + } + + bool empty_p () const; + + bool get_size (const region_model &model, bit_size_expr *out) const; + + bool get_size_in_bits (bit_size_t *out) const + { + if (concrete_p ()) + { + *out = m_next.get_bit_offset () - m_start.get_bit_offset (); + return true; + } + return false; + } + + bool as_concrete_bit_range (bit_range *out) const + { + if (!concrete_p ()) + return false; + bit_size_t size = m_next.get_bit_offset () - m_start.get_bit_offset (); + *out = bit_range (m_start.get_bit_offset (), size); + return true; + } + bool as_concrete_byte_range (byte_range *out) const + { + bit_range bits (0, 0); + if (!as_concrete_bit_range (&bits)) + return false; + return bits.as_byte_range (out); + } + + bool contains_p (const access_range &other) const; + + void dump_to_pp (pretty_printer *pp, bool) const; + void dump (bool) const; + void log (const char *title, logger &) const; + + region_offset m_start; + region_offset m_next; +}; + +struct access_operation +{ + access_operation (const region_model &model, + enum access_direction dir, + const region ®, + const svalue *sval_hint) + : m_model (model), + m_dir (dir), + m_reg (reg), + m_sval_hint (sval_hint), + m_base_region (reg.get_base_region ()) + {} + + region_model_manager *get_manager () const + { + return m_model.get_manager (); + } + + /* Get the valid bits to access within the base region. */ + access_range get_valid_bits () const; + + /* Get the actual bits accessed within the base region. */ + access_range get_actual_bits () const; + + bool maybe_get_invalid_before_bits (access_range *out) const; + bool maybe_get_invalid_after_bits (access_range *out) const; + + const region_model &m_model; + enum access_direction m_dir; + const region &m_reg; + const svalue *m_sval_hint; + const region *m_base_region; +}; + +class access_diagram : public text_art::wrapper_widget +{ +public: + access_diagram (const access_operation &op, + diagnostic_event_id_t region_creation_event_id, + text_art::style_manager &sm, + const text_art::theme &theme, + logger *logger); + const char *get_desc () const override + { + return "access_diagram"; + } +}; + +} // namespace ana + +#endif /* GCC_ANALYZER_ACCESS_DIAGRAM_H */ diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h index a161952..579517c 100644 --- a/gcc/analyzer/analyzer.h +++ b/gcc/analyzer/analyzer.h @@ -183,6 +183,11 @@ extern tree get_field_at_bit_offset (tree record_type, bit_offset_t bit_offset); class region_offset { public: + region_offset () + : m_base_region (NULL), m_offset (0), m_sym_offset (NULL) + { + } + static region_offset make_concrete (const region *base_region, bit_offset_t offset) { @@ -193,9 +198,12 @@ public: { return region_offset (base_region, 0, sym_offset); } + static region_offset make_byte_offset (const region *base_region, + const svalue *num_bytes_sval); const region *get_base_region () const { return m_base_region; } + bool concrete_p () const { return m_sym_offset == NULL; } bool symbolic_p () const { return m_sym_offset != NULL; } bit_offset_t get_bit_offset () const @@ -204,12 +212,26 @@ public: return m_offset; } + bool get_concrete_byte_offset (byte_offset_t *out) const + { + gcc_assert (!symbolic_p ()); + if (m_offset % BITS_PER_UNIT == 0) + { + *out = m_offset / BITS_PER_UNIT; + return true; + } + return false; + } + const svalue *get_symbolic_byte_offset () const { gcc_assert (symbolic_p ()); return m_sym_offset; } + tree calc_symbolic_bit_offset (const region_model &model) const; + const svalue *calc_symbolic_byte_offset (region_model_manager *mgr) const; + bool operator== (const region_offset &other) const { return (m_base_region == other.m_base_region @@ -217,6 +239,9 @@ public: && m_sym_offset == other.m_sym_offset); } + void dump_to_pp (pretty_printer *pp, bool) const; + void dump (bool) const; + private: region_offset (const region *base_region, bit_offset_t offset, const svalue *sym_offset) @@ -228,6 +253,11 @@ private: const svalue *m_sym_offset; }; +extern bool operator< (const region_offset &, const region_offset &); +extern bool operator<= (const region_offset &, const region_offset &); +extern bool operator> (const region_offset &, const region_offset &); +extern bool operator>= (const region_offset &, const region_offset &); + extern location_t get_stmt_location (const gimple *stmt, function *fun); extern bool compat_types_p (tree src_type, tree dst_type); diff --git a/gcc/analyzer/analyzer.opt b/gcc/analyzer/analyzer.opt index 9d1a937..2760aaa 100644 --- a/gcc/analyzer/analyzer.opt +++ b/gcc/analyzer/analyzer.opt @@ -54,6 +54,22 @@ The minimum number of supernodes within a function for the analyzer to consider Common Joined UInteger Var(param_analyzer_max_enodes_for_full_dump) Init(200) Param The maximum depth of exploded nodes that should appear in a dot dump before switching to a less verbose format. +-param=analyzer-text-art-string-ellipsis-threshold= +Common Joined UInteger Var(param_analyzer_text_art_string_ellipsis_threshold) Init(15) Param +The number of bytes at which to ellipsize string literals in analyzer text art diagrams. + +-param=analyzer-text-art-string-ellipsis-head-len= +Common Joined UInteger Var(param_analyzer_text_art_string_ellipsis_head_len) Init(6) Param +The number of literal bytes to show at the head of a string literal in text art when ellipsizing it. + +-param=analyzer-text-art-string-ellipsis-tail-len= +Common Joined UInteger Var(param_analyzer_text_art_string_ellipsis_tail_len) Init(6) Param +The number of literal bytes to show at the tail of a string literal in text art when ellipsizing it. + +-param=analyzer-text-art-ideal-canvas-width= +Common Joined UInteger Var(param_analyzer_text_art_ideal_canvas_width) Init(72) Param +The ideal width in characters of text art diagrams generated by the analyzer. + Wanalyzer-allocation-size Common Var(warn_analyzer_allocation_size) Init(1) Warning Warn about code paths in which a pointer to a buffer is assigned to an incompatible type. @@ -242,6 +258,10 @@ fanalyzer-checker= Common Joined RejectNegative Var(flag_analyzer_checker) Restrict the analyzer to run just the named checker. +fanalyzer-debug-text-art +Common Var(flag_analyzer_debug_text_art) Init(0) +Add extra annotations to diagrams. + fanalyzer-fine-grained Common Var(flag_analyzer_fine_grained) Init(0) Avoid combining multiple statements into one exploded edge. diff --git a/gcc/analyzer/bounds-checking.cc b/gcc/analyzer/bounds-checking.cc index a5692cf..10632d1 100644 --- a/gcc/analyzer/bounds-checking.cc +++ b/gcc/analyzer/bounds-checking.cc @@ -25,15 +25,18 @@ along with GCC; see the file COPYING3. If not see #include "tree.h" #include "function.h" #include "basic-block.h" +#include "intl.h" #include "gimple.h" #include "gimple-iterator.h" #include "diagnostic-core.h" #include "diagnostic-metadata.h" +#include "diagnostic-diagram.h" #include "analyzer/analyzer.h" #include "analyzer/analyzer-logging.h" #include "analyzer/region-model.h" #include "analyzer/checker-event.h" #include "analyzer/checker-path.h" +#include "analyzer/access-diagram.h" #if ENABLE_ANALYZER @@ -44,8 +47,35 @@ namespace ana { class out_of_bounds : public pending_diagnostic { public: - out_of_bounds (const region *reg, tree diag_arg) - : m_reg (reg), m_diag_arg (diag_arg) + class oob_region_creation_event_capacity : public region_creation_event_capacity + { + public: + oob_region_creation_event_capacity (tree capacity, + const event_loc_info &loc_info, + out_of_bounds &oob) + : region_creation_event_capacity (capacity, + loc_info), + m_oob (oob) + { + } + void prepare_for_emission (checker_path *path, + pending_diagnostic *pd, + diagnostic_event_id_t emission_id) override + { + region_creation_event_capacity::prepare_for_emission (path, + pd, + emission_id); + m_oob.m_region_creation_event_id = emission_id; + } + private: + out_of_bounds &m_oob; + }; + + out_of_bounds (const region_model &model, + const region *reg, + tree diag_arg, + const svalue *sval_hint) + : m_model (model), m_reg (reg), m_diag_arg (diag_arg), m_sval_hint (sval_hint) {} bool subclass_equal_p (const pending_diagnostic &base_other) const override @@ -63,7 +93,7 @@ public: void mark_interesting_stuff (interesting_t *interest) final override { - interest->add_region_creation (m_reg); + interest->add_region_creation (m_reg->get_base_region ()); } void add_region_creation_events (const region *, @@ -75,15 +105,25 @@ public: so we don't need an event for that. */ if (capacity) emission_path.add_event - (make_unique<region_creation_event_capacity> (capacity, loc_info)); + (make_unique<oob_region_creation_event_capacity> (capacity, loc_info, + *this)); } + virtual enum access_direction get_dir () const = 0; + protected: enum memory_space get_memory_space () const { return m_reg->get_memory_space (); } + void + maybe_show_notes (location_t loc, logger *logger) const + { + maybe_describe_array_bounds (loc); + maybe_show_diagram (logger); + } + /* Potentially add a note about valid ways to index this array, such as (given "int arr[10];"): note: valid subscripts for 'arr' are '[0]' to '[9]' @@ -112,8 +152,49 @@ protected: m_diag_arg, min_idx, max_idx); } + void + maybe_show_diagram (logger *logger) const + { + access_operation op (m_model, get_dir (), *m_reg, m_sval_hint); + + /* Don't attempt to make a diagram if there's no valid way of + accessing the base region (e.g. a 0-element array). */ + if (op.get_valid_bits ().empty_p ()) + return; + + if (const text_art::theme *theme = global_dc->m_diagrams.m_theme) + { + text_art::style_manager sm; + text_art::canvas canvas (make_access_diagram (op, sm, *theme, logger)); + if (canvas.get_size ().w == 0 && canvas.get_size ().h == 0) + { + /* In lieu of exceptions, return a zero-sized diagram if there's + a problem. Give up if that's happened. */ + return; + } + diagnostic_diagram diagram + (canvas, + /* Alt text. */ + _("Diagram visualizing the predicted out-of-bounds access")); + diagnostic_emit_diagram (global_dc, diagram); + } + } + + text_art::canvas + make_access_diagram (const access_operation &op, + text_art::style_manager &sm, + const text_art::theme &theme, + logger *logger) const + { + access_diagram d (op, m_region_creation_event_id, sm, theme, logger); + return d.to_canvas (sm); + } + + region_model m_model; const region *m_reg; tree m_diag_arg; + const svalue *m_sval_hint; + diagnostic_event_id_t m_region_creation_event_id; }; /* Abstract base class for all out-of-bounds warnings where the @@ -122,9 +203,11 @@ protected: class concrete_out_of_bounds : public out_of_bounds { public: - concrete_out_of_bounds (const region *reg, tree diag_arg, - byte_range out_of_bounds_range) - : out_of_bounds (reg, diag_arg), + concrete_out_of_bounds (const region_model &model, + const region *reg, tree diag_arg, + byte_range out_of_bounds_range, + const svalue *sval_hint) + : out_of_bounds (model, reg, diag_arg, sval_hint), m_out_of_bounds_range (out_of_bounds_range) {} @@ -146,9 +229,12 @@ protected: class concrete_past_the_end : public concrete_out_of_bounds { public: - concrete_past_the_end (const region *reg, tree diag_arg, byte_range range, - tree byte_bound) - : concrete_out_of_bounds (reg, diag_arg, range), m_byte_bound (byte_bound) + concrete_past_the_end (const region_model &model, + const region *reg, tree diag_arg, byte_range range, + tree byte_bound, + const svalue *sval_hint) + : concrete_out_of_bounds (model, reg, diag_arg, range, sval_hint), + m_byte_bound (byte_bound) {} bool @@ -168,7 +254,9 @@ public: { if (m_byte_bound && TREE_CODE (m_byte_bound) == INTEGER_CST) emission_path.add_event - (make_unique<region_creation_event_capacity> (m_byte_bound, loc_info)); + (make_unique<oob_region_creation_event_capacity> (m_byte_bound, + loc_info, + *this)); } protected: @@ -180,9 +268,11 @@ protected: class concrete_buffer_overflow : public concrete_past_the_end { public: - concrete_buffer_overflow (const region *reg, tree diag_arg, - byte_range range, tree byte_bound) - : concrete_past_the_end (reg, diag_arg, range, byte_bound) + concrete_buffer_overflow (const region_model &model, + const region *reg, tree diag_arg, + byte_range range, tree byte_bound, + const svalue *sval_hint) + : concrete_past_the_end (model, reg, diag_arg, range, byte_bound, sval_hint) {} const char *get_kind () const final override @@ -190,7 +280,8 @@ public: return "concrete_buffer_overflow"; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, + logger *logger) final override { diagnostic_metadata m; bool warned; @@ -238,7 +329,7 @@ public: "write to beyond the end of %qE", m_diag_arg); - maybe_describe_array_bounds (rich_loc->get_loc ()); + maybe_show_notes (rich_loc->get_loc (), logger); } return warned; @@ -276,6 +367,8 @@ public: start_buf, end_buf, m_byte_bound); } } + + enum access_direction get_dir () const final override { return DIR_WRITE; } }; /* Concrete subclass to complain about buffer over-reads. */ @@ -283,9 +376,10 @@ public: class concrete_buffer_over_read : public concrete_past_the_end { public: - concrete_buffer_over_read (const region *reg, tree diag_arg, + concrete_buffer_over_read (const region_model &model, + const region *reg, tree diag_arg, byte_range range, tree byte_bound) - : concrete_past_the_end (reg, diag_arg, range, byte_bound) + : concrete_past_the_end (model, reg, diag_arg, range, byte_bound, NULL) {} const char *get_kind () const final override @@ -293,7 +387,7 @@ public: return "concrete_buffer_over_read"; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *logger) final override { diagnostic_metadata m; bool warned; @@ -339,7 +433,7 @@ public: "read from after the end of %qE", m_diag_arg); - maybe_describe_array_bounds (rich_loc->get_loc ()); + maybe_show_notes (rich_loc->get_loc (), logger); } return warned; @@ -377,6 +471,8 @@ public: start_buf, end_buf, m_byte_bound); } } + + enum access_direction get_dir () const final override { return DIR_READ; } }; /* Concrete subclass to complain about buffer underwrites. */ @@ -384,9 +480,11 @@ public: class concrete_buffer_underwrite : public concrete_out_of_bounds { public: - concrete_buffer_underwrite (const region *reg, tree diag_arg, - byte_range range) - : concrete_out_of_bounds (reg, diag_arg, range) + concrete_buffer_underwrite (const region_model &model, + const region *reg, tree diag_arg, + byte_range range, + const svalue *sval_hint) + : concrete_out_of_bounds (model, reg, diag_arg, range, sval_hint) {} const char *get_kind () const final override @@ -394,7 +492,7 @@ public: return "concrete_buffer_underwrite"; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *logger) final override { diagnostic_metadata m; bool warned; @@ -415,7 +513,7 @@ public: break; } if (warned) - maybe_describe_array_bounds (rich_loc->get_loc ()); + maybe_show_notes (rich_loc->get_loc (), logger); return warned; } @@ -449,6 +547,8 @@ public: start_buf, end_buf);; } } + + enum access_direction get_dir () const final override { return DIR_WRITE; } }; /* Concrete subclass to complain about buffer under-reads. */ @@ -456,9 +556,10 @@ public: class concrete_buffer_under_read : public concrete_out_of_bounds { public: - concrete_buffer_under_read (const region *reg, tree diag_arg, + concrete_buffer_under_read (const region_model &model, + const region *reg, tree diag_arg, byte_range range) - : concrete_out_of_bounds (reg, diag_arg, range) + : concrete_out_of_bounds (model, reg, diag_arg, range, NULL) {} const char *get_kind () const final override @@ -466,7 +567,7 @@ public: return "concrete_buffer_under_read"; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *logger) final override { diagnostic_metadata m; bool warned; @@ -487,7 +588,7 @@ public: break; } if (warned) - maybe_describe_array_bounds (rich_loc->get_loc ()); + maybe_show_notes (rich_loc->get_loc (), logger); return warned; } @@ -521,6 +622,8 @@ public: start_buf, end_buf);; } } + + enum access_direction get_dir () const final override { return DIR_READ; } }; /* Abstract class to complain about out-of-bounds read/writes where @@ -529,9 +632,11 @@ public: class symbolic_past_the_end : public out_of_bounds { public: - symbolic_past_the_end (const region *reg, tree diag_arg, tree offset, - tree num_bytes, tree capacity) - : out_of_bounds (reg, diag_arg), + symbolic_past_the_end (const region_model &model, + const region *reg, tree diag_arg, tree offset, + tree num_bytes, tree capacity, + const svalue *sval_hint) + : out_of_bounds (model, reg, diag_arg, sval_hint), m_offset (offset), m_num_bytes (num_bytes), m_capacity (capacity) @@ -559,9 +664,12 @@ protected: class symbolic_buffer_overflow : public symbolic_past_the_end { public: - symbolic_buffer_overflow (const region *reg, tree diag_arg, tree offset, - tree num_bytes, tree capacity) - : symbolic_past_the_end (reg, diag_arg, offset, num_bytes, capacity) + symbolic_buffer_overflow (const region_model &model, + const region *reg, tree diag_arg, tree offset, + tree num_bytes, tree capacity, + const svalue *sval_hint) + : symbolic_past_the_end (model, reg, diag_arg, offset, num_bytes, capacity, + sval_hint) { } @@ -570,24 +678,31 @@ public: return "symbolic_buffer_overflow"; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *logger) final override { diagnostic_metadata m; + bool warned; switch (get_memory_space ()) { default: m.add_cwe (787); - return warning_meta (rich_loc, m, get_controlling_option (), - "buffer overflow"); + warned = warning_meta (rich_loc, m, get_controlling_option (), + "buffer overflow"); + break; case MEMSPACE_STACK: m.add_cwe (121); - return warning_meta (rich_loc, m, get_controlling_option (), - "stack-based buffer overflow"); + warned = warning_meta (rich_loc, m, get_controlling_option (), + "stack-based buffer overflow"); + break; case MEMSPACE_HEAP: m.add_cwe (122); - return warning_meta (rich_loc, m, get_controlling_option (), - "heap-based buffer overflow"); + warned = warning_meta (rich_loc, m, get_controlling_option (), + "heap-based buffer overflow"); + break; } + if (warned) + maybe_show_notes (rich_loc->get_loc (), logger); + return warned; } label_text @@ -658,6 +773,8 @@ public: m_diag_arg); return ev.formatted_print ("out-of-bounds write"); } + + enum access_direction get_dir () const final override { return DIR_WRITE; } }; /* Concrete subclass to complain about over-reads with symbolic values. */ @@ -665,9 +782,11 @@ public: class symbolic_buffer_over_read : public symbolic_past_the_end { public: - symbolic_buffer_over_read (const region *reg, tree diag_arg, tree offset, + symbolic_buffer_over_read (const region_model &model, + const region *reg, tree diag_arg, tree offset, tree num_bytes, tree capacity) - : symbolic_past_the_end (reg, diag_arg, offset, num_bytes, capacity) + : symbolic_past_the_end (model, reg, diag_arg, offset, num_bytes, capacity, + NULL) { } @@ -676,25 +795,32 @@ public: return "symbolic_buffer_over_read"; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *logger) final override { diagnostic_metadata m; m.add_cwe (126); + bool warned; switch (get_memory_space ()) { default: m.add_cwe (787); - return warning_meta (rich_loc, m, get_controlling_option (), - "buffer over-read"); + warned = warning_meta (rich_loc, m, get_controlling_option (), + "buffer over-read"); + break; case MEMSPACE_STACK: m.add_cwe (121); - return warning_meta (rich_loc, m, get_controlling_option (), - "stack-based buffer over-read"); + warned = warning_meta (rich_loc, m, get_controlling_option (), + "stack-based buffer over-read"); + break; case MEMSPACE_HEAP: m.add_cwe (122); - return warning_meta (rich_loc, m, get_controlling_option (), - "heap-based buffer over-read"); + warned = warning_meta (rich_loc, m, get_controlling_option (), + "heap-based buffer over-read"); + break; } + if (warned) + maybe_show_notes (rich_loc->get_loc (), logger); + return warned; } label_text @@ -765,6 +891,8 @@ public: m_diag_arg); return ev.formatted_print ("out-of-bounds read"); } + + enum access_direction get_dir () const final override { return DIR_READ; } }; /* Check whether an access is past the end of the BASE_REG. @@ -776,6 +904,7 @@ region_model::check_symbolic_bounds (const region *base_reg, const svalue *num_bytes_sval, const svalue *capacity, enum access_direction dir, + const svalue *sval_hint, region_model_context *ctxt) const { gcc_assert (ctxt); @@ -790,13 +919,21 @@ region_model::check_symbolic_bounds (const region *base_reg, tree offset_tree = get_representative_tree (sym_byte_offset); tree num_bytes_tree = get_representative_tree (num_bytes_sval); tree capacity_tree = get_representative_tree (capacity); + const region *offset_reg = m_mgr->get_offset_region (base_reg, + NULL_TREE, + sym_byte_offset); + const region *sized_offset_reg = m_mgr->get_sized_region (offset_reg, + NULL_TREE, + num_bytes_sval); switch (dir) { default: gcc_unreachable (); break; case DIR_READ: - ctxt->warn (make_unique<symbolic_buffer_over_read> (base_reg, + gcc_assert (sval_hint == nullptr); + ctxt->warn (make_unique<symbolic_buffer_over_read> (*this, + sized_offset_reg, diag_arg, offset_tree, num_bytes_tree, @@ -804,11 +941,13 @@ region_model::check_symbolic_bounds (const region *base_reg, return false; break; case DIR_WRITE: - ctxt->warn (make_unique<symbolic_buffer_overflow> (base_reg, + ctxt->warn (make_unique<symbolic_buffer_overflow> (*this, + sized_offset_reg, diag_arg, offset_tree, num_bytes_tree, - capacity_tree)); + capacity_tree, + sval_hint)); return false; break; } @@ -832,6 +971,7 @@ maybe_get_integer_cst_tree (const svalue *sval) bool region_model::check_region_bounds (const region *reg, enum access_direction dir, + const svalue *sval_hint, region_model_context *ctxt) const { gcc_assert (ctxt); @@ -882,8 +1022,8 @@ region_model::check_region_bounds (const region *reg, } else byte_offset_sval = reg_offset.get_symbolic_byte_offset (); - return check_symbolic_bounds (base_reg, byte_offset_sval, num_bytes_sval, - capacity, dir, ctxt); + return check_symbolic_bounds (base_reg, byte_offset_sval, num_bytes_sval, + capacity, dir, sval_hint, ctxt); } /* Otherwise continue to check with concrete values. */ @@ -902,13 +1042,17 @@ region_model::check_region_bounds (const region *reg, gcc_unreachable (); break; case DIR_READ: - ctxt->warn (make_unique<concrete_buffer_under_read> (reg, diag_arg, + gcc_assert (sval_hint == nullptr); + ctxt->warn (make_unique<concrete_buffer_under_read> (*this, reg, + diag_arg, out)); oob_safe = false; break; case DIR_WRITE: - ctxt->warn (make_unique<concrete_buffer_underwrite> (reg, diag_arg, - out)); + ctxt->warn (make_unique<concrete_buffer_underwrite> (*this, + reg, diag_arg, + out, + sval_hint)); oob_safe = false; break; } @@ -934,13 +1078,17 @@ region_model::check_region_bounds (const region *reg, gcc_unreachable (); break; case DIR_READ: - ctxt->warn (make_unique<concrete_buffer_over_read> (reg, diag_arg, + gcc_assert (sval_hint == nullptr); + ctxt->warn (make_unique<concrete_buffer_over_read> (*this, + reg, diag_arg, out, byte_bound)); oob_safe = false; break; case DIR_WRITE: - ctxt->warn (make_unique<concrete_buffer_overflow> (reg, diag_arg, - out, byte_bound)); + ctxt->warn (make_unique<concrete_buffer_overflow> (*this, + reg, diag_arg, + out, byte_bound, + sval_hint)); oob_safe = false; break; } diff --git a/gcc/analyzer/diagnostic-manager.cc b/gcc/analyzer/diagnostic-manager.cc index 0a447f7..cfca305 100644 --- a/gcc/analyzer/diagnostic-manager.cc +++ b/gcc/analyzer/diagnostic-manager.cc @@ -1421,7 +1421,7 @@ diagnostic_manager::emit_saved_diagnostic (const exploded_graph &eg, auto_diagnostic_group d; auto_cfun sentinel (sd.m_snode->m_fun); - if (sd.m_d->emit (&rich_loc)) + if (sd.m_d->emit (&rich_loc, get_logger ())) { sd.emit_any_notes (); diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index a5965c2..61685f4 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -1771,7 +1771,7 @@ public: return OPT_Wanalyzer_stale_setjmp_buffer; } - bool emit (rich_location *richloc) final override + bool emit (rich_location *richloc, logger *) final override { return warning_at (richloc, get_controlling_option (), @@ -3925,7 +3925,7 @@ public: return OPT_Wanalyzer_jump_through_null; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { return warning_at (rich_loc, get_controlling_option (), "jump through null pointer"); diff --git a/gcc/analyzer/infinite-recursion.cc b/gcc/analyzer/infinite-recursion.cc index c262e39..3ba316e 100644 --- a/gcc/analyzer/infinite-recursion.cc +++ b/gcc/analyzer/infinite-recursion.cc @@ -95,7 +95,7 @@ public: return OPT_Wanalyzer_infinite_recursion; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { /* "CWE-674: Uncontrolled Recursion". */ diagnostic_metadata m; diff --git a/gcc/analyzer/kf-analyzer.cc b/gcc/analyzer/kf-analyzer.cc index 32aa87b..5aed007 100644 --- a/gcc/analyzer/kf-analyzer.cc +++ b/gcc/analyzer/kf-analyzer.cc @@ -255,7 +255,7 @@ public: return 0; } - bool emit (rich_location *richloc) final override + bool emit (rich_location *richloc, logger *) final override { inform (richloc, "path"); return true; diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc index 1044111..3e319a0 100644 --- a/gcc/analyzer/kf.cc +++ b/gcc/analyzer/kf.cc @@ -553,7 +553,9 @@ kf_memset::impl_call_pre (const call_details &cd) const const region *sized_dest_reg = mgr->get_sized_region (dest_reg, NULL_TREE, num_bytes_sval); - model->check_region_for_write (sized_dest_reg, cd.get_ctxt ()); + model->check_region_for_write (sized_dest_reg, + nullptr, + cd.get_ctxt ()); model->fill_region (sized_dest_reg, fill_value_u8); } @@ -587,7 +589,7 @@ public: return OPT_Wanalyzer_putenv_of_auto_var; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { auto_diagnostic_group d; diagnostic_metadata m; diff --git a/gcc/analyzer/pending-diagnostic.h b/gcc/analyzer/pending-diagnostic.h index 6423c8b..7582b37 100644 --- a/gcc/analyzer/pending-diagnostic.h +++ b/gcc/analyzer/pending-diagnostic.h @@ -180,7 +180,7 @@ class pending_diagnostic /* Vfunc for emitting the diagnostic. The rich_location will have been populated with a diagnostic_path. Return true if a diagnostic is actually emitted. */ - virtual bool emit (rich_location *) = 0; + virtual bool emit (rich_location *, logger *) = 0; /* Hand-coded RTTI: get an ID for the subclass. */ virtual const char *get_kind () const = 0; diff --git a/gcc/analyzer/region-model-manager.cc b/gcc/analyzer/region-model-manager.cc index 3b95e43..1453acf 100644 --- a/gcc/analyzer/region-model-manager.cc +++ b/gcc/analyzer/region-model-manager.cc @@ -230,10 +230,11 @@ region_model_manager::get_or_create_constant_svalue (tree cst_expr) for VAL of type TYPE, creating it if necessary. */ const svalue * -region_model_manager::get_or_create_int_cst (tree type, poly_int64 val) +region_model_manager::get_or_create_int_cst (tree type, + const poly_wide_int_ref &cst) { gcc_assert (type); - tree tree_cst = build_int_cst (type, val); + tree tree_cst = wide_int_to_tree (type, cst); return get_or_create_constant_svalue (tree_cst); } @@ -612,7 +613,7 @@ region_model_manager::maybe_fold_binop (tree type, enum tree_code op, return get_or_create_constant_svalue (result); } - if (FLOAT_TYPE_P (type) + if ((type && FLOAT_TYPE_P (type)) || (arg0->get_type () && FLOAT_TYPE_P (arg0->get_type ())) || (arg1->get_type () && FLOAT_TYPE_P (arg1->get_type ()))) return NULL; @@ -634,6 +635,11 @@ region_model_manager::maybe_fold_binop (tree type, enum tree_code op, /* (0 - VAL) -> -VAL. */ if (cst0 && zerop (cst0)) return get_or_create_unaryop (type, NEGATE_EXPR, arg1); + /* (X + Y) - X -> Y. */ + if (const binop_svalue *binop = arg0->dyn_cast_binop_svalue ()) + if (binop->get_op () == PLUS_EXPR) + if (binop->get_arg0 () == arg1) + return get_or_create_cast (type, binop->get_arg1 ()); break; case MULT_EXPR: /* (VAL * 0). */ @@ -726,10 +732,7 @@ region_model_manager::maybe_fold_binop (tree type, enum tree_code op, if (cst1 && associative_tree_code (op)) if (const binop_svalue *binop = arg0->dyn_cast_binop_svalue ()) if (binop->get_op () == op - && binop->get_arg1 ()->maybe_get_constant () - && type == binop->get_type () - && type == binop->get_arg0 ()->get_type () - && type == binop->get_arg1 ()->get_type ()) + && binop->get_arg1 ()->maybe_get_constant ()) return get_or_create_binop (type, op, binop->get_arg0 (), get_or_create_binop (type, op, @@ -748,6 +751,21 @@ region_model_manager::maybe_fold_binop (tree type, enum tree_code op, get_or_create_binop (size_type_node, op, binop->get_arg1 (), arg1)); + /* Distribute multiplication by a constant through addition/subtraction: + (X + Y) * CST => (X * CST) + (Y * CST). */ + if (cst1 && op == MULT_EXPR) + if (const binop_svalue *binop = arg0->dyn_cast_binop_svalue ()) + if (binop->get_op () == PLUS_EXPR + || binop->get_op () == MINUS_EXPR) + { + return get_or_create_binop + (type, binop->get_op (), + get_or_create_binop (type, op, + binop->get_arg0 (), arg1), + get_or_create_binop (type, op, + binop->get_arg1 (), arg1)); + } + /* etc. */ return NULL; diff --git a/gcc/analyzer/region-model-manager.h b/gcc/analyzer/region-model-manager.h index 273fe7b..3340c3e 100644 --- a/gcc/analyzer/region-model-manager.h +++ b/gcc/analyzer/region-model-manager.h @@ -42,7 +42,7 @@ public: /* svalue consolidation. */ const svalue *get_or_create_constant_svalue (tree cst_expr); - const svalue *get_or_create_int_cst (tree type, poly_int64); + const svalue *get_or_create_int_cst (tree type, const poly_wide_int_ref &cst); const svalue *get_or_create_null_ptr (tree pointer_type); const svalue *get_or_create_unknown_svalue (tree type); const svalue *get_or_create_setjmp_svalue (const setjmp_record &r, diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 598196a..6bc60f8 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -507,7 +507,7 @@ public: bool terminate_path_p () const final override { return true; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { switch (m_pkind) { @@ -638,7 +638,7 @@ public: return OPT_Wanalyzer_shift_count_negative; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { return warning_at (rich_loc, get_controlling_option (), "shift by negative count (%qE)", m_count_cst); @@ -685,7 +685,7 @@ public: return OPT_Wanalyzer_shift_count_overflow; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { return warning_at (rich_loc, get_controlling_option (), "shift by count (%qE) >= precision of type (%qi)", @@ -1736,7 +1736,7 @@ check_external_function_for_access_attr (const gcall *call, tree ptr_tree = gimple_call_arg (call, access->ptrarg); const svalue *ptr_sval = get_rvalue (ptr_tree, &my_ctxt); const region *reg = deref_rvalue (ptr_sval, ptr_tree, &my_ctxt); - check_region_for_write (reg, &my_ctxt); + check_region_for_write (reg, nullptr, &my_ctxt); /* We don't use the size arg for now. */ } } @@ -2522,8 +2522,8 @@ region_model::deref_rvalue (const svalue *ptr_sval, tree ptr_tree, const poisoned_svalue *poisoned_sval = as_a <const poisoned_svalue *> (ptr_sval); enum poison_kind pkind = poisoned_sval->get_poison_kind (); - ctxt->warn (make_unique<poisoned_value_diagnostic> - (ptr, pkind, NULL, NULL)); + ctxt->warn (::make_unique<poisoned_value_diagnostic> + (ptr, pkind, nullptr, nullptr)); } } } @@ -2576,7 +2576,7 @@ public: return OPT_Wanalyzer_write_to_const; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { auto_diagnostic_group d; bool warned; @@ -2644,7 +2644,7 @@ public: return OPT_Wanalyzer_write_to_string_literal; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { return warning_at (rich_loc, get_controlling_option (), "write to string literal"); @@ -2742,6 +2742,15 @@ region_model::get_capacity (const region *reg) const /* Look through sized regions to get at the capacity of the underlying regions. */ return get_capacity (reg->get_parent_region ()); + case RK_STRING: + { + /* "Capacity" here means "size". */ + const string_region *string_reg = as_a <const string_region *> (reg); + tree string_cst = string_reg->get_string_cst (); + return m_mgr->get_or_create_int_cst (size_type_node, + TREE_STRING_LENGTH (string_cst)); + } + break; } if (const svalue *recorded = get_dynamic_extents (reg)) @@ -2781,11 +2790,14 @@ region_model::get_string_size (const region *reg) const /* If CTXT is non-NULL, use it to warn about any problems accessing REG, using DIR to determine if this access is a read or write. - Return TRUE if an UNKNOWN_SVALUE needs be created. */ + Return TRUE if an UNKNOWN_SVALUE needs be created. + If SVAL_HINT is non-NULL, use it as a hint in diagnostics + about the value that would be written to REG. */ bool region_model::check_region_access (const region *reg, enum access_direction dir, + const svalue *sval_hint, region_model_context *ctxt) const { /* Fail gracefully if CTXT is NULL. */ @@ -2794,7 +2806,7 @@ region_model::check_region_access (const region *reg, bool need_unknown_sval = false; check_region_for_taint (reg, dir, ctxt); - if (!check_region_bounds (reg, dir, ctxt)) + if (!check_region_bounds (reg, dir, sval_hint, ctxt)) need_unknown_sval = true; switch (dir) @@ -2815,9 +2827,10 @@ region_model::check_region_access (const region *reg, void region_model::check_region_for_write (const region *dest_reg, + const svalue *sval_hint, region_model_context *ctxt) const { - check_region_access (dest_reg, DIR_WRITE, ctxt); + check_region_access (dest_reg, DIR_WRITE, sval_hint, ctxt); } /* If CTXT is non-NULL, use it to warn about any problems reading from REG. @@ -2827,7 +2840,7 @@ bool region_model::check_region_for_read (const region *src_reg, region_model_context *ctxt) const { - return check_region_access (src_reg, DIR_READ, ctxt); + return check_region_access (src_reg, DIR_READ, NULL, ctxt); } /* Concrete subclass for casts of pointers that lead to trailing bytes. */ @@ -2863,7 +2876,7 @@ public: return OPT_Wanalyzer_allocation_size; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { diagnostic_metadata m; m.add_cwe (131); @@ -3203,7 +3216,7 @@ region_model::set_value (const region *lhs_reg, const svalue *rhs_sval, check_region_size (lhs_reg, rhs_sval, ctxt); - check_region_for_write (lhs_reg, ctxt); + check_region_for_write (lhs_reg, rhs_sval, ctxt); m_store.set_value (m_mgr->get_store_manager(), lhs_reg, rhs_sval, ctxt ? ctxt->get_uncertainty () : NULL); @@ -3836,7 +3849,12 @@ region_model::get_representative_path_var_1 (const svalue *sval, /* Prevent infinite recursion. */ if (visited->contains (sval)) - return path_var (NULL_TREE, 0); + { + if (sval->get_kind () == SK_CONSTANT) + return path_var (sval->maybe_get_constant (), 0); + else + return path_var (NULL_TREE, 0); + } visited->add (sval); /* Handle casts by recursion into get_representative_path_var. */ @@ -4941,7 +4959,7 @@ public: return same_tree_p (m_arg, ((const float_as_size_arg &) other).m_arg); } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { diagnostic_metadata m; bool warned = warning_meta (rich_loc, m, get_controlling_option (), @@ -5303,7 +5321,7 @@ public: return OPT_Wanalyzer_exposure_through_uninit_copy; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { diagnostic_metadata m; /* CWE-200: Exposure of Sensitive Information to an Unauthorized Actor. */ diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index 12f84b2..d6d9615 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -490,6 +490,7 @@ class region_model region_model_context *ctxt) const; void check_region_for_write (const region *dest_reg, + const svalue *sval_hint, region_model_context *ctxt) const; private: @@ -555,6 +556,7 @@ private: region_model_context *ctxt) const; bool check_region_access (const region *reg, enum access_direction dir, + const svalue *sval_hint, region_model_context *ctxt) const; bool check_region_for_read (const region *src_reg, region_model_context *ctxt) const; @@ -567,8 +569,10 @@ private: const svalue *num_bytes_sval, const svalue *capacity, enum access_direction dir, + const svalue *sval_hint, region_model_context *ctxt) const; bool check_region_bounds (const region *reg, enum access_direction dir, + const svalue *sval_hint, region_model_context *ctxt) const; void check_call_args (const call_details &cd) const; diff --git a/gcc/analyzer/region.cc b/gcc/analyzer/region.cc index 098b436..62ae0b2 100644 --- a/gcc/analyzer/region.cc +++ b/gcc/analyzer/region.cc @@ -63,6 +63,332 @@ along with GCC; see the file COPYING3. If not see namespace ana { +region_offset +region_offset::make_byte_offset (const region *base_region, + const svalue *num_bytes_sval) +{ + if (tree num_bytes_cst = num_bytes_sval->maybe_get_constant ()) + { + gcc_assert (TREE_CODE (num_bytes_cst) == INTEGER_CST); + bit_offset_t num_bits = wi::to_offset (num_bytes_cst) * BITS_PER_UNIT; + return make_concrete (base_region, num_bits); + } + else + { + return make_symbolic (base_region, num_bytes_sval); + } +} + +tree +region_offset::calc_symbolic_bit_offset (const region_model &model) const +{ + if (symbolic_p ()) + { + tree num_bytes_expr = model.get_representative_tree (m_sym_offset); + if (!num_bytes_expr) + return NULL_TREE; + tree bytes_to_bits_scale = build_int_cst (size_type_node, BITS_PER_UNIT); + return fold_build2 (MULT_EXPR, size_type_node, + num_bytes_expr, bytes_to_bits_scale); + } + else + { + tree cst = wide_int_to_tree (size_type_node, m_offset); + return cst; + } +} + +const svalue * +region_offset::calc_symbolic_byte_offset (region_model_manager *mgr) const +{ + if (symbolic_p ()) + return m_sym_offset; + else + { + byte_offset_t concrete_byte_offset; + if (get_concrete_byte_offset (&concrete_byte_offset)) + return mgr->get_or_create_int_cst (size_type_node, + concrete_byte_offset); + else + /* Can't handle bitfields; return UNKNOWN. */ + return mgr->get_or_create_unknown_svalue (size_type_node); + } +} + +void +region_offset::dump_to_pp (pretty_printer *pp, bool simple) const +{ + if (symbolic_p ()) + { + /* We don't bother showing the base region. */ + pp_string (pp, "byte "); + m_sym_offset->dump_to_pp (pp, simple); + } + else + { + if (m_offset % BITS_PER_UNIT == 0) + { + pp_string (pp, "byte "); + pp_wide_int (pp, m_offset / BITS_PER_UNIT, SIGNED); + } + else + { + pp_string (pp, "bit "); + pp_wide_int (pp, m_offset, SIGNED); + } + } +} + +DEBUG_FUNCTION void +region_offset::dump (bool simple) const +{ + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + pp_show_color (&pp) = pp_show_color (global_dc->printer); + pp.buffer->stream = stderr; + dump_to_pp (&pp, simple); + pp_newline (&pp); + pp_flush (&pp); +} + +/* An svalue that matches the pattern (BASE * FACTOR) + OFFSET + where FACTOR or OFFSET could be the identity (represented as NULL). */ + +struct linear_op +{ + linear_op (const svalue *base, + const svalue *factor, + const svalue *offset) + : m_base (base), m_factor (factor), m_offset (offset) + { + } + + bool maybe_get_cst_factor (bit_offset_t *out) const + { + if (m_factor == nullptr) + { + *out = 1; + return true; + } + if (tree cst_factor = m_factor->maybe_get_constant ()) + { + *out = wi::to_offset (cst_factor); + return true; + } + return false; + } + + bool maybe_get_cst_offset (bit_offset_t *out) const + { + if (m_offset == nullptr) + { + *out = 0; + return true; + } + if (tree cst_offset = m_offset->maybe_get_constant ()) + { + *out = wi::to_offset (cst_offset); + return true; + } + return false; + } + + static tristate + less (const linear_op &a, const linear_op &b) + { + /* Same base. */ + if (a.m_base == b.m_base) + { + bit_offset_t a_wi_factor; + bit_offset_t b_wi_factor; + if (a.maybe_get_cst_factor (&a_wi_factor) + && b.maybe_get_cst_factor (&b_wi_factor)) + { + if (a_wi_factor != b_wi_factor) + return tristate (a_wi_factor < b_wi_factor); + else + { + bit_offset_t a_wi_offset; + bit_offset_t b_wi_offset; + if (a.maybe_get_cst_offset (&a_wi_offset) + && b.maybe_get_cst_offset (&b_wi_offset)) + return tristate (a_wi_offset < b_wi_offset); + } + } + } + return tristate::unknown (); + } + + static tristate + le (const linear_op &a, const linear_op &b) + { + /* Same base. */ + if (a.m_base == b.m_base) + { + bit_offset_t a_wi_factor; + bit_offset_t b_wi_factor; + if (a.maybe_get_cst_factor (&a_wi_factor) + && b.maybe_get_cst_factor (&b_wi_factor)) + { + if (a_wi_factor != b_wi_factor) + return tristate (a_wi_factor <= b_wi_factor); + else + { + bit_offset_t a_wi_offset; + bit_offset_t b_wi_offset; + if (a.maybe_get_cst_offset (&a_wi_offset) + && b.maybe_get_cst_offset (&b_wi_offset)) + return tristate (a_wi_offset <= b_wi_offset); + } + } + } + return tristate::unknown (); + } + + static bool + from_svalue (const svalue &sval, linear_op *out) + { + switch (sval.get_kind ()) + { + default: + break; + case SK_BINOP: + { + const binop_svalue &binop_sval ((const binop_svalue &)sval); + if (binop_sval.get_op () == MULT_EXPR) + { + *out = linear_op (binop_sval.get_arg0 (), + binop_sval.get_arg1 (), + NULL); + return true; + } + else if (binop_sval.get_op () == PLUS_EXPR) + { + if (binop_sval.get_arg0 ()->get_kind () == SK_BINOP) + { + const binop_svalue &inner_binop_sval + ((const binop_svalue &)*binop_sval.get_arg0 ()); + if (inner_binop_sval.get_op () == MULT_EXPR) + { + *out = linear_op (inner_binop_sval.get_arg0 (), + inner_binop_sval.get_arg1 (), + binop_sval.get_arg1 ()); + return true; + } + } + + *out = linear_op (binop_sval.get_arg0 (), + NULL, + binop_sval.get_arg1 ()); + return true; + } + } + break; + } + return false; + } + + const svalue *m_base; + const svalue *m_factor; + const svalue *m_offset; +}; + +bool +operator< (const region_offset &a, const region_offset &b) +{ + if (a.symbolic_p ()) + { + if (b.symbolic_p ()) + { + /* Symbolic vs symbolic. */ + const svalue &a_sval = *a.get_symbolic_byte_offset (); + const svalue &b_sval = *b.get_symbolic_byte_offset (); + + linear_op op_a (NULL, NULL, NULL); + linear_op op_b (NULL, NULL, NULL); + if (linear_op::from_svalue (a_sval, &op_a) + && linear_op::from_svalue (b_sval, &op_b)) + { + tristate ts = linear_op::less (op_a, op_b); + if (ts.is_true ()) + return true; + else if (ts.is_false ()) + return false; + } + /* Use svalue's deterministic order, for now. */ + return (svalue::cmp_ptr (a.get_symbolic_byte_offset (), + b.get_symbolic_byte_offset ()) + < 0); + } + else + /* Symbolic vs concrete: put all symbolic after all concrete. */ + return false; + } + else + { + if (b.symbolic_p ()) + /* Concrete vs symbolic: put all concrete before all symbolic. */ + return true; + else + /* Concrete vs concrete. */ + return a.get_bit_offset () < b.get_bit_offset (); + } +} + +bool +operator<= (const region_offset &a, const region_offset &b) +{ + if (a.symbolic_p ()) + { + if (b.symbolic_p ()) + { + /* Symbolic vs symbolic. */ + const svalue &a_sval = *a.get_symbolic_byte_offset (); + const svalue &b_sval = *b.get_symbolic_byte_offset (); + + linear_op op_a (NULL, NULL, NULL); + linear_op op_b (NULL, NULL, NULL); + if (linear_op::from_svalue (a_sval, &op_a) + && linear_op::from_svalue (b_sval, &op_b)) + { + tristate ts = linear_op::le (op_a, op_b); + if (ts.is_true ()) + return true; + else if (ts.is_false ()) + return false; + } + /* Use svalue's deterministic order, for now. */ + return (svalue::cmp_ptr (a.get_symbolic_byte_offset (), + b.get_symbolic_byte_offset ()) + <= 0); + } + else + /* Symbolic vs concrete: put all symbolic after all concrete. */ + return false; + } + else + { + if (b.symbolic_p ()) + /* Concrete vs symbolic: put all concrete before all symbolic. */ + return true; + else + /* Concrete vs concrete. */ + return a.get_bit_offset () <= b.get_bit_offset (); + } +} + +bool +operator> (const region_offset &a, const region_offset &b) +{ + return b < a; +} + +bool +operator>= (const region_offset &a, const region_offset &b) +{ + return b <= a; +} + /* class region and its various subclasses. */ /* class region. */ @@ -339,6 +665,35 @@ region::get_offset (region_model_manager *mgr) const return *m_cached_offset; } +/* Get the region_offset for immediately beyond this region. */ + +region_offset +region::get_next_offset (region_model_manager *mgr) const +{ + region_offset start = get_offset (mgr); + + bit_size_t bit_size; + if (get_bit_size (&bit_size)) + { + if (start.concrete_p ()) + { + bit_offset_t next_bit_offset = start.get_bit_offset () + bit_size; + return region_offset::make_concrete (start.get_base_region (), + next_bit_offset); + } + } + + const svalue *start_byte_offset_sval = start.calc_symbolic_byte_offset (mgr); + const svalue *byte_size_sval = get_byte_size_sval (mgr); + const svalue *sum_sval + = mgr->get_or_create_binop (size_type_node, + PLUS_EXPR, + start_byte_offset_sval, + byte_size_sval); + return region_offset::make_symbolic (start.get_base_region (), + sum_sval); +} + /* Base class implementation of region::get_byte_size vfunc. If the size of this region (in bytes) is known statically, write it to *OUT and return true. @@ -617,7 +972,7 @@ region::get_relative_concrete_offset (bit_offset_t *) const const svalue * region::get_relative_symbolic_offset (region_model_manager *mgr) const { - return mgr->get_or_create_unknown_svalue (integer_type_node); + return mgr->get_or_create_unknown_svalue (ptrdiff_type_node); } /* Attempt to get the position and size of this region expressed as a @@ -1448,10 +1803,10 @@ field_region::get_relative_symbolic_offset (region_model_manager *mgr) const if (get_relative_concrete_offset (&out)) { tree cst_tree - = wide_int_to_tree (integer_type_node, out / BITS_PER_UNIT); + = wide_int_to_tree (ptrdiff_type_node, out / BITS_PER_UNIT); return mgr->get_or_create_constant_svalue (cst_tree); } - return mgr->get_or_create_unknown_svalue (integer_type_node); + return mgr->get_or_create_unknown_svalue (ptrdiff_type_node); } /* class element_region : public region. */ @@ -1533,14 +1888,14 @@ element_region::get_relative_symbolic_offset (region_model_manager *mgr) const HOST_WIDE_INT hwi_byte_size = int_size_in_bytes (elem_type); if (hwi_byte_size > 0) { - tree byte_size_tree = wide_int_to_tree (integer_type_node, + tree byte_size_tree = wide_int_to_tree (ptrdiff_type_node, hwi_byte_size); const svalue *byte_size_sval = mgr->get_or_create_constant_svalue (byte_size_tree); - return mgr->get_or_create_binop (integer_type_node, MULT_EXPR, + return mgr->get_or_create_binop (ptrdiff_type_node, MULT_EXPR, m_index, byte_size_sval); } - return mgr->get_or_create_unknown_svalue (integer_type_node); + return mgr->get_or_create_unknown_svalue (ptrdiff_type_node); } /* class offset_region : public region. */ @@ -1864,7 +2219,7 @@ bit_range_region::get_relative_symbolic_offset (region_model_manager *mgr) const { byte_offset_t start_byte = m_bits.get_start_bit_offset () / BITS_PER_UNIT; - tree start_bit_tree = wide_int_to_tree (integer_type_node, start_byte); + tree start_bit_tree = wide_int_to_tree (ptrdiff_type_node, start_byte); return mgr->get_or_create_constant_svalue (start_bit_tree); } diff --git a/gcc/analyzer/region.h b/gcc/analyzer/region.h index 270e5042..0c79490 100644 --- a/gcc/analyzer/region.h +++ b/gcc/analyzer/region.h @@ -183,6 +183,7 @@ public: bool involves_p (const svalue *sval) const; region_offset get_offset (region_model_manager *mgr) const; + region_offset get_next_offset (region_model_manager *mgr) const; /* Attempt to get the size of this region as a concrete number of bytes. If successful, return true and write the size to *OUT. diff --git a/gcc/analyzer/sm-fd.cc b/gcc/analyzer/sm-fd.cc index d107390..03ad359 100644 --- a/gcc/analyzer/sm-fd.cc +++ b/gcc/analyzer/sm-fd.cc @@ -465,7 +465,7 @@ public: } bool - emit (rich_location *rich_loc) final override + emit (rich_location *rich_loc, logger *) final override { /*CWE-775: Missing Release of File Descriptor or Handle after Effective Lifetime @@ -550,7 +550,7 @@ public: } bool - emit (rich_location *rich_loc) final override + emit (rich_location *rich_loc, logger *) final override { bool warned; switch (m_fd_dir) @@ -612,7 +612,7 @@ public: return OPT_Wanalyzer_fd_double_close; } bool - emit (rich_location *rich_loc) final override + emit (rich_location *rich_loc, logger *) final override { diagnostic_metadata m; // CWE-1341: Multiple Releases of Same Resource or Handle @@ -677,7 +677,7 @@ public: } bool - emit (rich_location *rich_loc) final override + emit (rich_location *rich_loc, logger *) final override { bool warned; warned = warning_at (rich_loc, get_controlling_option (), @@ -748,7 +748,7 @@ public: } bool - emit (rich_location *rich_loc) final override + emit (rich_location *rich_loc, logger *) final override { bool warned; warned = warning_at (rich_loc, get_controlling_option (), @@ -859,7 +859,7 @@ public: } bool - emit (rich_location *rich_loc) final override + emit (rich_location *rich_loc, logger *) final override { /* CWE-666: Operation on Resource in Wrong Phase of Lifetime. */ diagnostic_metadata m; @@ -1019,7 +1019,7 @@ public: } bool - emit (rich_location *rich_loc) final override + emit (rich_location *rich_loc, logger *) final override { switch (m_expected_type) { diff --git a/gcc/analyzer/sm-file.cc b/gcc/analyzer/sm-file.cc index d99a09b..0cfe682 100644 --- a/gcc/analyzer/sm-file.cc +++ b/gcc/analyzer/sm-file.cc @@ -176,7 +176,7 @@ public: return OPT_Wanalyzer_double_fclose; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { diagnostic_metadata m; /* CWE-1341: Multiple Releases of Same Resource or Handle. */ @@ -224,7 +224,7 @@ public: return OPT_Wanalyzer_file_leak; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { diagnostic_metadata m; /* CWE-775: "Missing Release of File Descriptor or Handle after diff --git a/gcc/analyzer/sm-malloc.cc b/gcc/analyzer/sm-malloc.cc index 7470137..a8c63eb 100644 --- a/gcc/analyzer/sm-malloc.cc +++ b/gcc/analyzer/sm-malloc.cc @@ -835,7 +835,7 @@ public: return OPT_Wanalyzer_mismatching_deallocation; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { auto_diagnostic_group d; diagnostic_metadata m; @@ -914,7 +914,7 @@ public: return OPT_Wanalyzer_double_free; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { auto_diagnostic_group d; diagnostic_metadata m; @@ -1010,7 +1010,7 @@ public: return OPT_Wanalyzer_possible_null_dereference; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { /* CWE-690: Unchecked Return Value to NULL Pointer Dereference. */ diagnostic_metadata m; @@ -1099,7 +1099,7 @@ public: return OPT_Wanalyzer_possible_null_argument; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { /* CWE-690: Unchecked Return Value to NULL Pointer Dereference. */ auto_diagnostic_group d; @@ -1152,7 +1152,7 @@ public: bool terminate_path_p () const final override { return true; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { /* CWE-476: NULL Pointer Dereference. */ diagnostic_metadata m; @@ -1207,7 +1207,7 @@ public: bool terminate_path_p () const final override { return true; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { /* CWE-476: NULL Pointer Dereference. */ auto_diagnostic_group d; @@ -1264,7 +1264,7 @@ public: return OPT_Wanalyzer_use_after_free; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { /* CWE-416: Use After Free. */ diagnostic_metadata m; @@ -1358,7 +1358,7 @@ public: return OPT_Wanalyzer_malloc_leak; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { /* "CWE-401: Missing Release of Memory after Effective Lifetime". */ diagnostic_metadata m; @@ -1432,7 +1432,7 @@ public: return OPT_Wanalyzer_free_of_non_heap; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { auto_diagnostic_group d; diagnostic_metadata m; @@ -1511,7 +1511,7 @@ public: return OPT_Wanalyzer_deref_before_check; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { /* Don't emit the warning if we can't show where the deref and the check occur. */ diff --git a/gcc/analyzer/sm-pattern-test.cc b/gcc/analyzer/sm-pattern-test.cc index 6c1c950..4c88bca 100644 --- a/gcc/analyzer/sm-pattern-test.cc +++ b/gcc/analyzer/sm-pattern-test.cc @@ -92,7 +92,7 @@ public: return 0; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { return warning_at (rich_loc, get_controlling_option (), "pattern match on %<%E %s %E%>", diff --git a/gcc/analyzer/sm-sensitive.cc b/gcc/analyzer/sm-sensitive.cc index d94d9e0..0597e39 100644 --- a/gcc/analyzer/sm-sensitive.cc +++ b/gcc/analyzer/sm-sensitive.cc @@ -95,7 +95,8 @@ public: return OPT_Wanalyzer_exposure_through_output_file; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, + logger *) final override { diagnostic_metadata m; /* CWE-532: Information Exposure Through Log Files */ diff --git a/gcc/analyzer/sm-signal.cc b/gcc/analyzer/sm-signal.cc index ac01f6a..e3f9092 100644 --- a/gcc/analyzer/sm-signal.cc +++ b/gcc/analyzer/sm-signal.cc @@ -114,7 +114,7 @@ public: return OPT_Wanalyzer_unsafe_call_within_signal_handler; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { auto_diagnostic_group d; diagnostic_metadata m; diff --git a/gcc/analyzer/sm-taint.cc b/gcc/analyzer/sm-taint.cc index f72f194..6d28d1f 100644 --- a/gcc/analyzer/sm-taint.cc +++ b/gcc/analyzer/sm-taint.cc @@ -211,7 +211,7 @@ public: return OPT_Wanalyzer_tainted_array_index; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { diagnostic_metadata m; /* CWE-129: "Improper Validation of Array Index". */ @@ -327,7 +327,7 @@ public: return OPT_Wanalyzer_tainted_offset; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { diagnostic_metadata m; /* CWE-823: "Use of Out-of-range Pointer Offset". */ @@ -437,7 +437,7 @@ public: return OPT_Wanalyzer_tainted_size; } - bool emit (rich_location *rich_loc) override + bool emit (rich_location *rich_loc, logger *) override { /* "CWE-129: Improper Validation of Array Index". */ diagnostic_metadata m; @@ -547,9 +547,9 @@ public: return "tainted_access_attrib_size"; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *logger) final override { - bool warned = tainted_size::emit (rich_loc); + bool warned = tainted_size::emit (rich_loc, logger); if (warned) { inform (DECL_SOURCE_LOCATION (m_callee_fndecl), @@ -583,7 +583,7 @@ public: return OPT_Wanalyzer_tainted_divisor; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { diagnostic_metadata m; /* CWE-369: "Divide By Zero". */ @@ -645,7 +645,7 @@ public: return OPT_Wanalyzer_tainted_allocation_size; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { diagnostic_metadata m; /* "CWE-789: Memory Allocation with Excessive Size Value". */ @@ -800,7 +800,7 @@ public: return OPT_Wanalyzer_tainted_assertion; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { diagnostic_metadata m; /* "CWE-617: Reachable Assertion". */ diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc index 4d1de82..c7bc4b4 100644 --- a/gcc/analyzer/store.cc +++ b/gcc/analyzer/store.cc @@ -236,8 +236,8 @@ bit_range::dump () const pp_flush (&pp); } -/* If OTHER is a subset of this, return true and write - to *OUT the relative range of OTHER within this. +/* If OTHER is a subset of this, return true and, if OUT is + non-null, write to *OUT the relative range of OTHER within this. Otherwise return false. */ bool @@ -246,8 +246,11 @@ bit_range::contains_p (const bit_range &other, bit_range *out) const if (contains_p (other.get_start_bit_offset ()) && contains_p (other.get_last_bit_offset ())) { - out->m_start_bit_offset = other.m_start_bit_offset - m_start_bit_offset; - out->m_size_in_bits = other.m_size_in_bits; + if (out) + { + out->m_start_bit_offset = other.m_start_bit_offset - m_start_bit_offset; + out->m_size_in_bits = other.m_size_in_bits; + } return true; } else diff --git a/gcc/analyzer/store.h b/gcc/analyzer/store.h index 7ded650..af6cc7e 100644 --- a/gcc/analyzer/store.h +++ b/gcc/analyzer/store.h @@ -350,6 +350,15 @@ struct byte_range m_size_in_bytes * BITS_PER_UNIT); } + bit_offset_t get_start_bit_offset () const + { + return m_start_byte_offset * BITS_PER_UNIT; + } + bit_offset_t get_next_bit_offset () const + { + return get_next_byte_offset () * BITS_PER_UNIT; + } + static int cmp (const byte_range &br1, const byte_range &br2); byte_offset_t m_start_byte_offset; diff --git a/gcc/analyzer/varargs.cc b/gcc/analyzer/varargs.cc index aeea73a..72e1b31 100644 --- a/gcc/analyzer/varargs.cc +++ b/gcc/analyzer/varargs.cc @@ -403,7 +403,7 @@ public: && 0 == strcmp (m_usage_fnname, other.m_usage_fnname)); } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { auto_diagnostic_group d; return warning_at (rich_loc, get_controlling_option (), @@ -478,7 +478,7 @@ public: return va_list_sm_diagnostic::subclass_equal_p (other); } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { auto_diagnostic_group d; return warning_at (rich_loc, get_controlling_option (), @@ -892,7 +892,7 @@ public: return OPT_Wanalyzer_va_arg_type_mismatch; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { auto_diagnostic_group d; diagnostic_metadata m; @@ -942,7 +942,7 @@ public: return OPT_Wanalyzer_va_list_exhausted; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { auto_diagnostic_group d; diagnostic_metadata m; diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 7f76337..fd4943b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -10612,6 +10612,15 @@ offset as well as the capacity is symbolic. See @uref{https://cwe.mitre.org/data/definitions/119.html, CWE-119: Improper Restriction of Operations within the Bounds of a Memory Buffer}. +For cases where the analyzer is able, it will emit a text art diagram +visualizing the spatial relationship between the memory region that the +analyzer predicts would be accessed, versus the range of memory that is +valid to access: whether they overlap, are touching, are close or far +apart; which one is before or after in memory, the relative sizes +involved, the direction of the access (read vs write), and, in some +cases, the values of data involved. This diagram can be suppressed +using @option{-fdiagnostics-text-art-charset=none}. + @opindex Wanalyzer-possible-null-argument @opindex Wno-analyzer-possible-null-argument @item -Wno-analyzer-possible-null-argument @@ -11122,6 +11131,12 @@ following warnings from @option{-fanalyzer}: -Wanalyzer-va-list-use-after-va-end } +@opindex fanalyzer-debug-text-art +@opindex fno-analyzer-debug-text-art +@item -fanalyzer-debug-text-art-headings +This option is intended for analyzer developers. If enabled, +the analyzer will add extra annotations to any diagrams it generates. + @opindex fanalyzer-feasibility @opindex fno-analyzer-feasibility @item -fno-analyzer-feasibility diff --git a/gcc/testsuite/gcc.dg/analyzer/data-model-1.c b/gcc/testsuite/gcc.dg/analyzer/data-model-1.c index 86d1ccf..3c4a45f 100644 --- a/gcc/testsuite/gcc.dg/analyzer/data-model-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/data-model-1.c @@ -240,8 +240,8 @@ void test_16 (void) __analyzer_eval (strlen (msg) == 11); /* { dg-warning "TRUE" } */ /* Out-of-bounds. */ - __analyzer_eval (msg[100] == 'e'); /* { dg-warning "UNKNOWN" } */ - // TODO: some kind of warning for the out-of-bounds access + __analyzer_eval (msg[100] == 'e'); /* { dg-warning "UNKNOWN" "eval result" } */ + /* { dg-warning "buffer over-read" "out-of-bounds" { target *-*-* } .-1 } */ } static const char *__attribute__((noinline)) diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-ascii.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-ascii.c new file mode 100644 index 0000000..5e6eadc --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-ascii.c @@ -0,0 +1,55 @@ +/* { dg-additional-options "-fdiagnostics-show-path-depths" } */ +/* { dg-additional-options "-fdiagnostics-path-format=inline-events -fdiagnostics-show-caret" } */ +/* { dg-additional-options "-fdiagnostics-text-art-charset=ascii" } */ + +#include <stdint.h> + +int32_t arr[10]; + +void int_arr_write_element_after_end_off_by_one(int32_t x) +{ + arr[10] = x; /* { dg-line line } */ +} +/* { dg-warning "buffer overflow" "warning" { target *-*-* } line } */ +/* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } line } */ + + +/* { dg-begin-multiline-output "" } + arr[10] = x; + ~~~~~~~~^~~ + event 1 (depth 0) + | + | int32_t arr[10]; + | ^~~ + | | + | (1) capacity: 40 bytes + | + +--> 'int_arr_write_element_after_end_off_by_one': event 2 (depth 1) + | + | arr[10] = x; + | ~~~~~~~~^~~ + | | + | (2) out-of-bounds write from byte 40 till byte 43 but 'arr' ends at byte 40 + | + { dg-end-multiline-output "" } */ + +/* { dg-begin-multiline-output "" } + + +--------------------------------+ + |write from 'x' (type: 'int32_t')| + +--------------------------------+ + | + | + v + +--------+-----------------+---------++--------------------------------+ + | [0] | ... | [9] || | + +--------+-----------------+---------+| after valid range | + | 'arr' (type: 'int32_t[10]') || | + +------------------------------------++--------------------------------+ + |~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~||~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~| + | | + +---------+--------+ +---------+---------+ + |capacity: 40 bytes| |overflow of 4 bytes| + +------------------+ +-------------------+ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-debug.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-debug.c new file mode 100644 index 0000000..4c4d9d1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-debug.c @@ -0,0 +1,40 @@ +/* Test of -fanalyzer-debug-text-art. */ + +/* { dg-additional-options "-fdiagnostics-text-art-charset=ascii -fanalyzer-debug-text-art" } */ + +#include <stdint.h> + +int32_t arr[10]; + +void int_arr_write_element_after_end_off_by_one(int32_t x) +{ + arr[10] = x; /* { dg-line line } */ +} +/* { dg-warning "buffer overflow" "warning" { target *-*-* } line } */ +/* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } line } */ + +/* { dg-begin-multiline-output "" } + + +---------+-----------+-----------+---+--------------------------------+ + | tc0 | tc1 | tc2 |tc3| tc4 | + +---------+-----------+-----------+---+--------------------------------+ + |bytes 0-3|bytes 4-35 |bytes 36-39| | bytes 40-43 | + +---------+-----------+-----------+ +--------------------------------+ + +--------------------------------+ + |write from 'x' (type: 'int32_t')| + +--------------------------------+ + | + | + v + +---------+-----------+-----------+ +--------------------------------+ + | [0] | ... | [9] | | | + +---------+-----------+-----------+ | after valid range | + | 'arr' (type: 'int32_t[10]') | | | + +---------------------------------+ +--------------------------------+ + |~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~| |~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~| + | | + +---------+--------+ +---------+---------+ + |capacity: 40 bytes| |overflow of 4 bytes| + +------------------+ +-------------------+ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-emoji.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-emoji.c new file mode 100644 index 0000000..1c61252 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-emoji.c @@ -0,0 +1,55 @@ +/* { dg-additional-options "-fdiagnostics-show-path-depths" } */ +/* { dg-additional-options "-fdiagnostics-path-format=inline-events -fdiagnostics-show-caret" } */ +/* { dg-additional-options "-fdiagnostics-text-art-charset=emoji" } */ + +#include <stdint.h> + +int32_t arr[10]; + +void int_arr_write_element_after_end_off_by_one(int32_t x) +{ + arr[10] = x; /* { dg-line line } */ +} +/* { dg-warning "buffer overflow" "warning" { target *-*-* } line } */ +/* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } line } */ + + +/* { dg-begin-multiline-output "" } + arr[10] = x; + ~~~~~~~~^~~ + event 1 (depth 0) + | + | int32_t arr[10]; + | ^~~ + | | + | (1) capacity: 40 bytes + | + +--> 'int_arr_write_element_after_end_off_by_one': event 2 (depth 1) + | + | arr[10] = x; + | ~~~~~~~~^~~ + | | + | (2) out-of-bounds write from byte 40 till byte 43 but 'arr' ends at byte 40 + | + { dg-end-multiline-output "" } */ + +/* { dg-begin-multiline-output "" } + + ┌────────────────────────────────┐ + │write from 'x' (type: 'int32_t')│ + └────────────────────────────────┘ + │ + │ + v + ┌────────┬─────────────────┬─────────┐┌────────────────────────────────┐ + │ [0] │ ... │ [9] ││ │ + ├────────┴─────────────────┴─────────┤│ after valid range │ + │ 'arr' (type: 'int32_t[10]') ││ │ + └────────────────────────────────────┘└────────────────────────────────┘ + ├─────────────────┬──────────────────┤├───────────────┬────────────────┤ + │ │ + ╭─────────┴────────╮ ╭───────────┴──────────╮ + │capacity: 40 bytes│ │⚠️ overflow of 4 bytes│ + ╰──────────────────╯ ╰──────────────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-json.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-json.c new file mode 100644 index 0000000..0a2cc34 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-json.c @@ -0,0 +1,13 @@ +/* { dg-additional-options "-fdiagnostics-format=json-file" } */ + +/* The custom JSON format doesn't support text art, so this is just a simple + smoketext. */ + +#include <stdint.h> + +int32_t arr[10]; + +void int_arr_write_element_after_end_off_by_one(int32_t x) +{ + arr[10] = x; +} diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-sarif.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-sarif.c new file mode 100644 index 0000000..051a1ce --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-sarif.c @@ -0,0 +1,24 @@ +/* We require -fdiagnostics-text-art-charset= to get any text art here + because of the test suite using -fdiagnostics-plain-output. */ + +/* { dg-additional-options "-fdiagnostics-format=sarif-file -fdiagnostics-text-art-charset=ascii" } */ + +#include <stdint.h> + +int32_t arr[10]; + +void int_arr_write_element_after_end_off_by_one(int32_t x) +{ + arr[10] = x; +} + +/* Verify that some JSON was written to a file with the expected name. + + { dg-final { verify-sarif-file } } + + Expect the "alt-text" to be captured. + { dg-final { scan-sarif-file "\"text\": \"Diagram visualizing the predicted out-of-bounds access\"," } } + + Expect the diagram to have 4 leading spaces (to indicate a code block), + and that at least part of the diagram was written out. + { dg-final { scan-sarif-file "\"markdown\": \" .*capacity: 40 bytes.*\"" } } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-unicode.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-unicode.c new file mode 100644 index 0000000..71f66ff --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-1-unicode.c @@ -0,0 +1,55 @@ +/* { dg-additional-options "-fdiagnostics-show-path-depths" } */ +/* { dg-additional-options "-fdiagnostics-path-format=inline-events -fdiagnostics-show-caret" } */ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ + +#include <stdint.h> + +int32_t arr[10]; + +void int_arr_write_element_after_end_off_by_one(int32_t x) +{ + arr[10] = x; /* { dg-line line } */ +} +/* { dg-warning "buffer overflow" "warning" { target *-*-* } line } */ +/* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } line } */ + + +/* { dg-begin-multiline-output "" } + arr[10] = x; + ~~~~~~~~^~~ + event 1 (depth 0) + | + | int32_t arr[10]; + | ^~~ + | | + | (1) capacity: 40 bytes + | + +--> 'int_arr_write_element_after_end_off_by_one': event 2 (depth 1) + | + | arr[10] = x; + | ~~~~~~~~^~~ + | | + | (2) out-of-bounds write from byte 40 till byte 43 but 'arr' ends at byte 40 + | + { dg-end-multiline-output "" } */ + +/* { dg-begin-multiline-output "" } + + ┌────────────────────────────────┐ + │write from 'x' (type: 'int32_t')│ + └────────────────────────────────┘ + │ + │ + v + ┌────────┬─────────────────┬─────────┐┌────────────────────────────────┐ + │ [0] │ ... │ [9] ││ │ + ├────────┴─────────────────┴─────────┤│ after valid range │ + │ 'arr' (type: 'int32_t[10]') ││ │ + └────────────────────────────────────┘└────────────────────────────────┘ + ├─────────────────┬──────────────────┤├───────────────┬────────────────┤ + │ │ + ╭─────────┴────────╮ ╭─────────┴─────────╮ + │capacity: 40 bytes│ │overflow of 4 bytes│ + ╰──────────────────╯ ╰───────────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-10.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-10.c new file mode 100644 index 0000000..4a7b8e3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-10.c @@ -0,0 +1,29 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ + +#include <stdint.h> +#include <stdlib.h> + +int32_t int_vla_write_element_symbolic_before_start (int32_t x, size_t n) +{ + int32_t arr[n]; /* { dg-message "\\(1\\) capacity: 'n \\* 4' bytes" } */ + arr[-2] = 42; /* { dg-warning "stack-based buffer underwrite" } */ +} + +/* { dg-begin-multiline-output "" } + + ┌───────────────────┐ + │write of '(int) 42'│ + └───────────────────┘ + │ + │ + v + ┌───────────────────┐ ┌────────────────────────────────┐ + │before valid range │ │buffer allocated on stack at (1)│ + └───────────────────┘ └────────────────────────────────┘ + ├─────────┬─────────┤├───────┬───────┤├───────────────┬────────────────┤ + │ │ │ + ╭─────────┴───────────╮ ╭───┴───╮ ╭───────────┴───────────╮ + │underwrite of 4 bytes│ │4 bytes│ │capacity: 'n * 4' bytes│ + ╰─────────────────────╯ ╰───────╯ ╰───────────────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-11.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-11.c new file mode 100644 index 0000000..f8eb158 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-11.c @@ -0,0 +1,82 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ +/* { dg-require-effective-target alloca } */ + +#include <stdlib.h> +#include <stdint.h> +#include <string.h> + +void test6 (size_t size) +{ + int32_t *buf = __builtin_alloca (4 * size); + memset (buf, 0, 4 * size); + int32_t last = *(buf + 4 * size); /* { dg-warning "stack-based buffer over-read" } */ +} + +/* (size * 16) - (size * 4) leads to a gap of (size * 12). */ + +/* { dg-begin-multiline-output "" } + + ┌─────────────────┐ + │ read of 4 bytes │ + └─────────────────┘ + ^ + │ + │ + ┌────────────────────────────────┐ ┌─────────────────┐ + │buffer allocated on stack at (1)│ │after valid range│ + └────────────────────────────────┘ └─────────────────┘ + ├───────────────┬────────────────┤├────────┬────────┤├────────┬────────┤ + │ │ │ + │ │ ╭──────────┴─────────╮ + │ │ │over-read of 4 bytes│ + │ │ ╰────────────────────╯ + ╭───────────┴──────────╮ ╭────────┴────────╮ + │size: 'size * 4' bytes│ │'size * 12' bytes│ + ╰──────────────────────╯ ╰─────────────────╯ + + { dg-end-multiline-output "" } */ + +void test7 (size_t size) +{ + int32_t *buf = __builtin_alloca (4 * size + 3); /* { dg-warning "allocated buffer size is not a multiple of the pointee's size" } */ + buf[size] = 42; /* { dg-warning "stack-based buffer overflow" } */ +} + +/* { dg-begin-multiline-output "" } + + ┌───────────────────────────────────────┐ + │ write of '(int) 42' │ + └───────────────────────────────────────┘ + │ │ + │ │ + v v + ┌──────────────────────────────────────────────────┐┌──────────────────┐ + │ buffer allocated on stack at (1) ││after valid range │ + └──────────────────────────────────────────────────┘└──────────────────┘ + ├────────────────────────┬─────────────────────────┤├────────┬─────────┤ + │ │ + ╭───────────────┴──────────────╮ ╭─────────┴────────╮ + │capacity: 'size * 4 + 3' bytes│ │overflow of 1 byte│ + ╰──────────────────────────────╯ ╰──────────────────╯ + + { dg-end-multiline-output "" } */ + + +/* We're currently not able to generate a diagram for this case; + make sure we handle this case gracefully. */ + +char *test99 (const char *x, const char *y) +{ + size_t len_x = __builtin_strlen (x); + size_t len_y = __builtin_strlen (y); + /* BUG (root cause): forgot to add 1 for terminator. */ + size_t sz = len_x + len_y; + char *result = __builtin_malloc (sz); + if (!result) + return NULL; + __builtin_memcpy (result, x, len_x); + __builtin_memcpy (result + len_x, y, len_y); + /* BUG (symptom): off-by-one out-of-bounds write to heap. */ + result[len_x + len_y] = '\0'; /* { dg-warning "heap-based buffer overflow" } */ + return result; +} diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-12.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-12.c new file mode 100644 index 0000000..3573750 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-12.c @@ -0,0 +1,53 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ +/* { dg-require-effective-target alloca } */ + +#include <string.h> +#include <stdlib.h> +#include <stdint.h> + +void test8 (size_t size, size_t offset) +{ + char src[size]; + char dst[size]; + memcpy (dst, src, size + offset); /* { dg-line test8 } */ + /* { dg-warning "over-read" "warning" { target *-*-* } test8 } */ + /* { dg-warning "overflow" "warning" { target *-*-* } test8 } */ +} + +/* { dg-begin-multiline-output "" } + + ┌──────────────────────────────────────────────────────────────────────┐ + │ read of 'size + offset' bytes │ + └──────────────────────────────────────────────────────────────────────┘ + ^ ^ + │ │ + │ │ + ┌──────────────────────────────────┐┌──────────────────────────────────┐ + │ buffer allocated on stack at (1) ││ after valid range │ + └──────────────────────────────────┘└──────────────────────────────────┘ + ├────────────────┬─────────────────┤├────────────────┬─────────────────┤ + │ │ + ╭─────────┴────────╮ ╭─────────────┴─────────────╮ + │size: 'size' bytes│ │over-read of 'offset' bytes│ + ╰──────────────────╯ ╰───────────────────────────╯ + + { dg-end-multiline-output "" } */ + +/* { dg-begin-multiline-output "" } + + ┌──────────────────────────────────────────────────────────────────────┐ + │ write of 'size + offset' bytes │ + └──────────────────────────────────────────────────────────────────────┘ + │ │ + │ │ + v v + ┌──────────────────────────────────┐┌──────────────────────────────────┐ + │ buffer allocated on stack at (1) ││ after valid range │ + └──────────────────────────────────┘└──────────────────────────────────┘ + ├────────────────┬─────────────────┤├────────────────┬─────────────────┤ + │ │ + ╭───────────┴──────────╮ ╭─────────────┴────────────╮ + │capacity: 'size' bytes│ │overflow of 'offset' bytes│ + ╰──────────────────────╯ ╰──────────────────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-13.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-13.c new file mode 100644 index 0000000..dcd1263 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-13.c @@ -0,0 +1,43 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ + +#include <string.h> + +void +test_non_ascii () +{ + char buf[9]; + strcpy (buf, "Liberté\n"); /* { dg-warning "stack-based buffer overflow" } */ + /* { dg-warning "'__builtin_memcpy' writing 10 bytes into a region of size 9 overflows the destination" "" { target *-*-* } .-1 } */ +} + +/* Example of non-ASCII UTF-8 that's short enough to fully quote, whilst + containing control characters. */ + +/* { dg-begin-multiline-output "" } + + ┌──────┬──────┬──────┬──────┬──────┬──────┬────┬────┬──────┐┌─────────────────┐ + │ [0] │ [1] │ [2] │ [3] │ [4] │ [5] │[6] │[7] │ [8] ││ [9] │ + ├──────┼──────┼──────┼──────┼──────┼──────┼────┼────┼──────┤├─────────────────┤ + │ 0x4c │ 0x69 │ 0x62 │ 0x65 │ 0x72 │ 0x74 │0xc3│0xa9│ 0x0a ││ 0x00 │ + ├──────┼──────┼──────┼──────┼──────┼──────┼────┴────┼──────┤├─────────────────┤ + │U+004c│U+0069│U+0062│U+0065│U+0072│U+0074│ U+00e9 │U+000a││ U+0000 │ + ├──────┼──────┼──────┼──────┼──────┼──────┼─────────┼──────┤├─────────────────┤ + │ L │ i │ b │ e │ r │ t │ é │ ││ NUL │ + ├──────┴──────┴──────┴──────┴──────┴──────┴─────────┴──────┴┴─────────────────┤ + │ string literal (type: 'char[10]') │ + └─────────────────────────────────────────────────────────────────────────────┘ + │ │ │ │ │ │ │ │ │ │ + │ │ │ │ │ │ │ │ │ │ + v v v v v v v v v v + ┌──────┬────────────────────────────────────────────┬──────┐┌─────────────────┐ + │ [0] │ ... │ [8] ││ │ + ├──────┴────────────────────────────────────────────┴──────┤│after valid range│ + │ 'buf' (type: 'char[9]') ││ │ + └──────────────────────────────────────────────────────────┘└─────────────────┘ + ├────────────────────────────┬─────────────────────────────┤├────────┬────────┤ + │ │ + ╭────────┴────────╮ ╭─────────┴────────╮ + │capacity: 9 bytes│ │overflow of 1 byte│ + ╰─────────────────╯ ╰──────────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-14.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-14.c new file mode 100644 index 0000000..3cedf06 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-14.c @@ -0,0 +1,110 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ + +#include <stdint.h> + +extern int32_t arr_0[0]; /* { dg-message "capacity: 0 bytes" } */ +extern int32_t arr_1[1]; /* { dg-message "capacity: 4 bytes" } */ +extern int32_t arr_2[2]; /* { dg-message "capacity: 8 bytes" } */ +extern int32_t arr_3[3]; /* { dg-message "capacity: 12 bytes" } */ +extern int32_t arr_4[4]; /* { dg-message "capacity: 16 bytes" } */ + +void test_immediately_after (int x) +{ + arr_0[0] = x; /* { dg-warning "buffer overflow" } */ + arr_1[1] = x; /* { dg-warning "buffer overflow" } */ + arr_2[2] = x; /* { dg-warning "buffer overflow" } */ + arr_3[3] = x; /* { dg-warning "buffer overflow" } */ + arr_4[4] = x; /* { dg-warning "buffer overflow" } */ +} + +/* Expect no diagram for the arr_0 case: there's no valid region +to write to. */ + +/* The arr_1 case. */ +/* { dg-begin-multiline-output "" } + + ┌──────────────────────────────────┐ + │ write from 'x' (type: 'int') │ + └──────────────────────────────────┘ + │ + │ + v + ┌──────────────────────────────────┐┌──────────────────────────────────┐ + │ [0] ││ │ + ├──────────────────────────────────┤│ after valid range │ + │ 'arr_1' (type: 'int32_t[1]') ││ │ + └──────────────────────────────────┘└──────────────────────────────────┘ + ├────────────────┬─────────────────┤├────────────────┬─────────────────┤ + │ │ + ╭────────┴────────╮ ╭─────────┴─────────╮ + │capacity: 4 bytes│ │overflow of 4 bytes│ + ╰─────────────────╯ ╰───────────────────╯ + + { dg-end-multiline-output "" } */ + +/* The arr_2 case. */ +/* { dg-begin-multiline-output "" } + + ┌────────────────────────────┐ + │write from 'x' (type: 'int')│ + └────────────────────────────┘ + │ + │ + v + ┌────────────────────┬───────────────────┐┌────────────────────────────┐ + │ [0] │ [1] ││ │ + ├────────────────────┴───────────────────┤│ after valid range │ + │ 'arr_2' (type: 'int32_t[2]') ││ │ + └────────────────────────────────────────┘└────────────────────────────┘ + ├───────────────────┬────────────────────┤├─────────────┬──────────────┤ + │ │ + ╭────────┴────────╮ ╭─────────┴─────────╮ + │capacity: 8 bytes│ │overflow of 4 bytes│ + ╰─────────────────╯ ╰───────────────────╯ + + { dg-end-multiline-output "" } */ + +/* The arr_3 case. */ +// Perhaps we should show [1] rather than ellipsize here. +/* { dg-begin-multiline-output "" } + + ┌────────────────────────────┐ + │write from 'x' (type: 'int')│ + └────────────────────────────┘ + │ + │ + v + ┌─────────────┬─────────────┬────────────┐┌────────────────────────────┐ + │ [0] │ ... │ [2] ││ │ + ├─────────────┴─────────────┴────────────┤│ after valid range │ + │ 'arr_3' (type: 'int32_t[3]') ││ │ + └────────────────────────────────────────┘└────────────────────────────┘ + ├───────────────────┬────────────────────┤├─────────────┬──────────────┤ + │ │ + ╭─────────┴────────╮ ╭─────────┴─────────╮ + │capacity: 12 bytes│ │overflow of 4 bytes│ + ╰──────────────────╯ ╰───────────────────╯ + + { dg-end-multiline-output "" } */ + +/* The arr_4 case. */ +/* { dg-begin-multiline-output "" } + + ┌────────────────────────────┐ + │write from 'x' (type: 'int')│ + └────────────────────────────┘ + │ + │ + v + ┌──────────┬──────────────────┬──────────┐┌────────────────────────────┐ + │ [0] │ ... │ [3] ││ │ + ├──────────┴──────────────────┴──────────┤│ after valid range │ + │ 'arr_4' (type: 'int32_t[4]') ││ │ + └────────────────────────────────────────┘└────────────────────────────┘ + ├───────────────────┬────────────────────┤├─────────────┬──────────────┤ + │ │ + ╭─────────┴────────╮ ╭─────────┴─────────╮ + │capacity: 16 bytes│ │overflow of 4 bytes│ + ╰──────────────────╯ ╰───────────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-15.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-15.c new file mode 100644 index 0000000..e2a6381 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-15.c @@ -0,0 +1,42 @@ +/* Regression test for ICE with short values of + --param=analyzer-text-art-string-ellipsis-threshold=. */ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode --param=analyzer-text-art-string-ellipsis-threshold=0" } */ + +#include <string.h> + +void +test_non_ascii () +{ + char buf[9]; + strcpy (buf, "Liberté\n"); /* { dg-warning "stack-based buffer overflow" } */ + /* { dg-warning "'__builtin_memcpy' writing 10 bytes into a region of size 9 overflows the destination" "" { target *-*-* } .-1 } */ +} + +/* { dg-begin-multiline-output "" } + + ┌──────┬──────┬──────┬──────┬──────┬──────┬────┬────┬──────┐┌─────────────────┐ + │ [0] │ [1] │ [2] │ [3] │ [4] │ [5] │[6] │[7] │ [8] ││ [9] │ + ├──────┼──────┼──────┼──────┼──────┼──────┼────┼────┼──────┤├─────────────────┤ + │ 0x4c │ 0x69 │ 0x62 │ 0x65 │ 0x72 │ 0x74 │0xc3│0xa9│ 0x0a ││ 0x00 │ + ├──────┼──────┼──────┼──────┼──────┼──────┼────┴────┼──────┤├─────────────────┤ + │U+004c│U+0069│U+0062│U+0065│U+0072│U+0074│ U+00e9 │U+000a││ U+0000 │ + ├──────┼──────┼──────┼──────┼──────┼──────┼─────────┼──────┤├─────────────────┤ + │ L │ i │ b │ e │ r │ t │ é │ ││ NUL │ + ├──────┴──────┴──────┴──────┴──────┴──────┴─────────┴──────┴┴─────────────────┤ + │ string literal (type: 'char[10]') │ + └─────────────────────────────────────────────────────────────────────────────┘ + │ │ │ │ │ │ │ │ │ │ + │ │ │ │ │ │ │ │ │ │ + v v v v v v v v v v + ┌──────┬────────────────────────────────────────────┬──────┐┌─────────────────┐ + │ [0] │ ... │ [8] ││ │ + ├──────┴────────────────────────────────────────────┴──────┤│after valid range│ + │ 'buf' (type: 'char[9]') ││ │ + └──────────────────────────────────────────────────────────┘└─────────────────┘ + ├────────────────────────────┬─────────────────────────────┤├────────┬────────┤ + │ │ + ╭────────┴────────╮ ╭─────────┴────────╮ + │capacity: 9 bytes│ │overflow of 1 byte│ + ╰─────────────────╯ ╰──────────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-2.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-2.c new file mode 100644 index 0000000..535dab1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-2.c @@ -0,0 +1,30 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ + +#include <stdint.h> +#include <stdlib.h> + +void int_vla_write_element_after_end_off_by_one(int32_t x, size_t n) +{ + int32_t arr[n]; /* { dg-message "\\(1\\) capacity: 'n \\* 4' bytes" } */ + + arr[n] = x; /* { dg-warning "stack-based buffer overflow" } */ +} + +/* { dg-begin-multiline-output "" } + + ┌──────────────────────────────────┐ + │ write from 'x' (type: 'int32_t') │ + └──────────────────────────────────┘ + │ + │ + v + ┌──────────────────────────────────┐┌──────────────────────────────────┐ + │ buffer allocated on stack at (1) ││ after valid range │ + └──────────────────────────────────┘└──────────────────────────────────┘ + ├────────────────┬─────────────────┤├────────────────┬─────────────────┤ + │ │ + ╭───────────┴───────────╮ ╭─────────┴─────────╮ + │capacity: 'n * 4' bytes│ │overflow of 4 bytes│ + ╰───────────────────────╯ ╰───────────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-3.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-3.c new file mode 100644 index 0000000..064f3fa --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-3.c @@ -0,0 +1,45 @@ +/* The multiline output assumes sizeof(size_t) == 8. + { dg-require-effective-target lp64 } */ + +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ + +#include <stdlib.h> +#include <string.h> +#include <stdint.h> + +struct str { + size_t len; + char data[]; +}; + +struct str * +make_str_badly (const char *src) +{ + size_t len = strlen(src); + struct str *str = malloc(sizeof(str) + len); /* { dg-message "\\(1\\) capacity: 'len \\+ 8' bytes" } */ + if (!str) + return NULL; + str->len = len; + memcpy(str->data, src, len); + str->data[len] = '\0'; /* { dg-warning "heap-based buffer overflow" } */ + return str; +} + +/* { dg-begin-multiline-output "" } + + ┌──────────────────────────────────┐ + │ write of '(char) 0' │ + └──────────────────────────────────┘ + │ + │ + v + ┌──────────────────────────────────┐┌──────────────────────────────────┐ + │ buffer allocated on heap at (1) ││ after valid range │ + └──────────────────────────────────┘└──────────────────────────────────┘ + ├────────────────┬─────────────────┤├────────────────┬─────────────────┤ + │ │ + ╭────────────┴────────────╮ ╭─────────┴────────╮ + │capacity: 'len + 8' bytes│ │overflow of 1 byte│ + ╰─────────────────────────╯ ╰──────────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-4.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-4.c new file mode 100644 index 0000000..ec8e4ab --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-4.c @@ -0,0 +1,45 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ + +#include <string.h> + +#define LOREM_IPSUM \ + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod" \ + " tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim" \ + " veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea" \ + " commodo consequat. Duis aute irure dolor in reprehenderit in voluptate" \ + " velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint" \ + " occaecat cupidatat non proident, sunt in culpa qui officia deserunt" \ + " mollit anim id est laborum." + +void +test_long_string () +{ + char buf[100]; + strcpy (buf, LOREM_IPSUM); /* { dg-warning "stack-based buffer overflow" } */ + /* { dg-warning "'__builtin_memcpy' writing 446 bytes into a region of size 100 overflows the destination" "" { target *-*-* } .-1 } */ +} + +/* { dg-begin-multiline-output "" } + + ┌───┬───┬───┬───┬───┬───┬──────────┬─────┬─────┬─────┬─────┬─────┬─────┐ + │[0]│[1]│[2]│[3]│[4]│[5]│ │[440]│[441]│[442]│[443]│[444]│[445]│ + ├───┼───┼───┼───┼───┼───┤ ... ├─────┼─────┼─────┼─────┼─────┼─────┤ + │'L'│'o'│'r'│'e'│'m'│' '│ │ 'o' │ 'r' │ 'u' │ 'm' │ '.' │ NUL │ + ├───┴───┴───┴───┴───┴───┴──────────┴─────┴─────┴─────┴─────┴─────┴─────┤ + │ string literal (type: 'char[446]') │ + └──────────────────────────────────────────────────────────────────────┘ + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ + v v v v v v v v v v v v v v v + ┌───┬─────────────────────┬────┐┌──────────────────────────────────────┐ + │[0]│ ... │[99]││ │ + ├───┴─────────────────────┴────┤│ after valid range │ + │ 'buf' (type: 'char[100]') ││ │ + └──────────────────────────────┘└──────────────────────────────────────┘ + ├──────────────┬───────────────┤├──────────────────┬───────────────────┤ + │ │ + ╭─────────┴─────────╮ ╭──────────┴──────────╮ + │capacity: 100 bytes│ │overflow of 346 bytes│ + ╰───────────────────╯ ╰─────────────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-5-ascii.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-5-ascii.c new file mode 100644 index 0000000..e82bce9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-5-ascii.c @@ -0,0 +1,40 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=ascii" } */ + +#include <string.h> + +void +test_non_ascii () +{ + char buf[5]; + strcpy (buf, "文字化け"); /* { dg-warning "stack-based buffer overflow" } */ + /* { dg-warning "'__builtin_memcpy' writing 13 bytes into a region of size 5 overflows the destination" "" { target *-*-* } .-1 } */ +} + +/* Without unicode support, we shouldn't show the printable unicode chars. */ + +/* { dg-begin-multiline-output "" } + + +-----+-----+-----+----+----++----+----+----+----+----+----+----+------+ + | [0] | [1] | [2] |[3] |[4] ||[5] |[6] |[7] |[8] |[9] |[10]|[11]| [12] | + +-----+-----+-----+----+----++----+----+----+----+----+----+----+------+ + |0xe6 |0x96 |0x87 |0xe5|0xad||0x97|0xe5|0x8c|0x96|0xe3|0x81|0x91| 0x00 | + +-----+-----+-----+----+----++----+----+----+----+----+----+----+------+ + | U+6587 | U+5b57 | U+5316 | U+3051 |U+0000| + +-----------------+---------------+--------------+--------------+------+ + | string literal (type: 'char[13]') | + +----------------------------------------------------------------------+ + | | | | | | | | | | | | | + | | | | | | | | | | | | | + v v v v v v v v v v v v v + +-----+----------------+----++-----------------------------------------+ + | [0] | ... |[4] || | + +-----+----------------+----+| after valid range | + | 'buf' (type: 'char[5]') || | + +---------------------------++-----------------------------------------+ + |~~~~~~~~~~~~~+~~~~~~~~~~~~~||~~~~~~~~~~~~~~~~~~~~+~~~~~~~~~~~~~~~~~~~~| + | | + +--------+--------+ +---------+---------+ + |capacity: 5 bytes| |overflow of 8 bytes| + +-----------------+ +-------------------+ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-5-unicode.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-5-unicode.c new file mode 100644 index 0000000..48fa12f --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-5-unicode.c @@ -0,0 +1,42 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ + +#include <string.h> + +void +test_non_ascii () +{ + char buf[5]; + strcpy (buf, "文字化け"); /* { dg-warning "stack-based buffer overflow" } */ + /* { dg-warning "'__builtin_memcpy' writing 13 bytes into a region of size 5 overflows the destination" "" { target *-*-* } .-1 } */ +} + +/* With unicode support, we should show the printable unicode chars. */ + +/* { dg-begin-multiline-output "" } + + ┌─────┬─────┬─────┬────┬────┐┌────┬────┬────┬────┬────┬────┬────┬──────┐ + │ [0] │ [1] │ [2] │[3] │[4] ││[5] │[6] │[7] │[8] │[9] │[10]│[11]│ [12] │ + ├─────┼─────┼─────┼────┼────┤├────┼────┼────┼────┼────┼────┼────┼──────┤ + │0xe6 │0x96 │0x87 │0xe5│0xad││0x97│0xe5│0x8c│0x96│0xe3│0x81│0x91│ 0x00 │ + ├─────┴─────┴─────┼────┴────┴┴────┼────┴────┴────┼────┴────┴────┼──────┤ + │ U+6587 │ U+5b57 │ U+5316 │ U+3051 │U+0000│ + ├─────────────────┼───────────────┼──────────────┼──────────────┼──────┤ + │ 文 │ 字 │ 化 │ け │ NUL │ + ├─────────────────┴───────────────┴──────────────┴──────────────┴──────┤ + │ string literal (type: 'char[13]') │ + └──────────────────────────────────────────────────────────────────────┘ + │ │ │ │ │ │ │ │ │ │ │ │ │ + │ │ │ │ │ │ │ │ │ │ │ │ │ + v v v v v v v v v v v v v + ┌─────┬────────────────┬────┐┌─────────────────────────────────────────┐ + │ [0] │ ... │[4] ││ │ + ├─────┴────────────────┴────┤│ after valid range │ + │ 'buf' (type: 'char[5]') ││ │ + └───────────────────────────┘└─────────────────────────────────────────┘ + ├─────────────┬─────────────┤├────────────────────┬────────────────────┤ + │ │ + ╭────────┴────────╮ ╭─────────┴─────────╮ + │capacity: 5 bytes│ │overflow of 8 bytes│ + ╰─────────────────╯ ╰───────────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-6.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-6.c new file mode 100644 index 0000000..25bf9d5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-6.c @@ -0,0 +1,125 @@ +/* { dg-require-effective-target lp64 } + Misbehaves with -m32 due to optimization turning the pointer arithmetic into: + _2 = &buf + 4294967246; + memcpy (_2, _1, 4096); +*/ + +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ + +#include <string.h> + +#define LOREM_IPSUM \ + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod" \ + " tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim" \ + " veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea" \ + " commodo consequat. Duis aute irure dolor in reprehenderit in voluptate" \ + " velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint" \ + " occaecat cupidatat non proident, sunt in culpa qui officia deserunt" \ + " mollit anim id est laborum." + +/* This memcpy reads from both before and after the bounds of the + string literal, and writes to both before and after the bounds of "buf". */ + +void +test_bad_memcpy () +{ + char buf[100]; + memcpy (buf - 50, LOREM_IPSUM - 100, 4096); /* { dg-warning "stack-based buffer overflow" } */ + /* { dg-warning "stack-based buffer underwrite" "" { target *-*-* } .-1 } */ + /* { dg-warning "buffer under-read" "" { target *-*-* } .-2 } */ + /* { dg-warning "buffer over-read" "" { target *-*-* } .-3 } */ + /* { dg-warning "'memcpy' writing 4096 bytes into a region of size 0 overflows the destination" "" { target *-*-* } .-4 } */ +} + +/* { dg-begin-multiline-output "" } + + ┌─────────────────────────────────────────────────────────────────────────┐ + │ read of 4096 bytes │ + └─────────────────────────────────────────────────────────────────────────┘ + ^ ^ ^ ^ ^ + │ │ │ │ │ + │ │ │ │ │ + ┌──────────────────┐┌──────────┬──────────┬────────────┐┌─────────────────┐ + │ ││ [0] │ ... │ [445] ││ │ + │before valid range│├──────────┴──────────┴────────────┤│after valid range│ + │ ││string literal (type: 'char[446]')││ │ + └──────────────────┘└──────────────────────────────────┘└─────────────────┘ + ├────────┬─────────┤├────────────────┬─────────────────┤├────────┬────────┤ + │ │ │ + ╭────────┴──────────────╮ ╭───────┴───────╮ ╭───────────┴───────────╮ + │under-read of 100 bytes│ │size: 446 bytes│ │over-read of 3550 bytes│ + ╰───────────────────────╯ ╰───────────────╯ ╰───────────────────────╯ + + { dg-end-multiline-output "" } */ + +/* { dg-begin-multiline-output "" } + + ┌──────────────────────────────────────────────────────────────────────┐ + │ write of 4096 bytes │ + └──────────────────────────────────────────────────────────────────────┘ + │ │ │ │ │ + │ │ │ │ │ + v v v v v + ┌──────────────────┐┌───────┬───────┬─────────┐┌───────────────────────┐ + │ ││ [0] │ ... │ [99] ││ │ + │before valid range│├───────┴───────┴─────────┤│ after valid range │ + │ ││'buf' (type: 'char[100]')││ │ + └──────────────────┘└─────────────────────────┘└───────────────────────┘ + ├────────┬─────────┤├────────────┬────────────┤├───────────┬───────────┤ + │ │ │ + │ ╭─────────┴─────────╮ ╭───────────┴──────────╮ + │ │capacity: 100 bytes│ │overflow of 3946 bytes│ + │ ╰───────────────────╯ ╰──────────────────────╯ + ╭────────┴─────────────╮ + │underwrite of 50 bytes│ + ╰──────────────────────╯ + + { dg-end-multiline-output "" } */ + +/* The read and write diagrams are each emitted twice: once for the "before" + and once for the "after" diagnostic. */ + +/* { dg-begin-multiline-output "" } + + ┌─────────────────────────────────────────────────────────────────────────┐ + │ read of 4096 bytes │ + └─────────────────────────────────────────────────────────────────────────┘ + ^ ^ ^ ^ ^ + │ │ │ │ │ + │ │ │ │ │ + ┌──────────────────┐┌──────────┬──────────┬────────────┐┌─────────────────┐ + │ ││ [0] │ ... │ [445] ││ │ + │before valid range│├──────────┴──────────┴────────────┤│after valid range│ + │ ││string literal (type: 'char[446]')││ │ + └──────────────────┘└──────────────────────────────────┘└─────────────────┘ + ├────────┬─────────┤├────────────────┬─────────────────┤├────────┬────────┤ + │ │ │ + ╭────────┴──────────────╮ ╭───────┴───────╮ ╭───────────┴───────────╮ + │under-read of 100 bytes│ │size: 446 bytes│ │over-read of 3550 bytes│ + ╰───────────────────────╯ ╰───────────────╯ ╰───────────────────────╯ + + { dg-end-multiline-output "" } */ + +/* { dg-begin-multiline-output "" } + + ┌──────────────────────────────────────────────────────────────────────┐ + │ write of 4096 bytes │ + └──────────────────────────────────────────────────────────────────────┘ + │ │ │ │ │ + │ │ │ │ │ + v v v v v + ┌──────────────────┐┌───────┬───────┬─────────┐┌───────────────────────┐ + │ ││ [0] │ ... │ [99] ││ │ + │before valid range│├───────┴───────┴─────────┤│ after valid range │ + │ ││'buf' (type: 'char[100]')││ │ + └──────────────────┘└─────────────────────────┘└───────────────────────┘ + ├────────┬─────────┤├────────────┬────────────┤├───────────┬───────────┤ + │ │ │ + │ ╭─────────┴─────────╮ ╭───────────┴──────────╮ + │ │capacity: 100 bytes│ │overflow of 3946 bytes│ + │ ╰───────────────────╯ ╰──────────────────────╯ + ╭────────┴─────────────╮ + │underwrite of 50 bytes│ + ╰──────────────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-7.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-7.c new file mode 100644 index 0000000..25a9acc --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-7.c @@ -0,0 +1,36 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ + +#include <string.h> + +void +test_string_with_control_chars () +{ + char buf[8]; + strcpy (buf, "\tone\n\ttwo\n"); /* { dg-warning "stack-based buffer overflow" } */ + /* { dg-warning "'__builtin_memcpy' writing 11 bytes into a region of size 8 overflows the destination" "" { target *-*-* } .-1 } */ +} + +/* { dg-begin-multiline-output "" } + + ┌──────┬──────┬──────┬─────┬─────┬─────┬─────┬─────┐┌─────┬─────┬──────┐ + │ [0] │ [1] │ [2] │ [3] │ [4] │ [5] │ [6] │ [7] ││ [8] │ [9] │ [10] │ + ├──────┼──────┼──────┼─────┼─────┼─────┼─────┼─────┤├─────┼─────┼──────┤ + │ 0x09 │ 'o' │ 'n' │ 'e' │0x0a │0x09 │ 't' │ 'w' ││ 'o' │0x0a │ NUL │ + ├──────┴──────┴──────┴─────┴─────┴─────┴─────┴─────┴┴─────┴─────┴──────┤ + │ string literal (type: 'char[11]') │ + └──────────────────────────────────────────────────────────────────────┘ + │ │ │ │ │ │ │ │ │ │ │ + │ │ │ │ │ │ │ │ │ │ │ + v v v v v v v v v v v + ┌──────┬─────────────────────────────────────┬─────┐┌──────────────────┐ + │ [0] │ ... │ [7] ││ │ + ├──────┴─────────────────────────────────────┴─────┤│after valid range │ + │ 'buf' (type: 'char[8]') ││ │ + └──────────────────────────────────────────────────┘└──────────────────┘ + ├────────────────────────┬─────────────────────────┤├────────┬─────────┤ + │ │ + ╭────────┴────────╮ ╭─────────┴─────────╮ + │capacity: 8 bytes│ │overflow of 3 bytes│ + ╰─────────────────╯ ╰───────────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-8.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-8.c new file mode 100644 index 0000000..24d8735 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-8.c @@ -0,0 +1,34 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ + +#include <stdlib.h> +#include <stdint.h> + +/* Gap of 4, then an overflow of 4. */ + +void test2 (size_t size) +{ + int32_t *buf = __builtin_malloc (size * sizeof(int32_t)); /* { dg-message "\\(1\\) capacity: 'size \\* 4' bytes" } */ + if (!buf) return; + + buf[size + 1] = 42; /* { dg-warning "heap-based buffer overflow" } */ + __builtin_free (buf); +} + +/* { dg-begin-multiline-output "" } + + ┌───────────────────┐ + │write of '(int) 42'│ + └───────────────────┘ + │ + │ + v + ┌───────────────────────────────┐ ┌───────────────────┐ + │buffer allocated on heap at (1)│ │ after valid range │ + └───────────────────────────────┘ └───────────────────┘ + ├───────────────┬───────────────┤├───────┬────────┤├─────────┬─────────┤ + │ │ │ + ╭─────────────┴────────────╮ ╭───┴───╮ ╭─────────┴─────────╮ + │capacity: 'size * 4' bytes│ │4 bytes│ │overflow of 4 bytes│ + ╰──────────────────────────╯ ╰───────╯ ╰───────────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-9.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-9.c new file mode 100644 index 0000000..bb9ad66 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-9.c @@ -0,0 +1,42 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ + +#include <stdint.h> + +struct st +{ + char buf[16]; + int32_t x; + int32_t y; +}; + +struct st arr[10]; + +int32_t struct_arr_read_x_element_before_start_far(void) +{ + return arr[-100].x; /* { dg-warning "buffer under-read" "warning" } */ + /* { dg-message "out-of-bounds read from byte -2384 till byte -2381 but 'arr' starts at byte 0" "final event" { target *-*-* } .-1 } */ + /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } .-2 } */ +} + +// TODO: show index of accessed element +// TODO: show field of accessed element +/* { dg-begin-multiline-output "" } + + ┌───────────────────────────┐ + │read of 'int32_t' (4 bytes)│ + └───────────────────────────┘ + ^ + │ + │ + ┌───────────────────────────┐ ┌─────────┬─────────┬─────────┐ + │ │ │ [0] │ ... │ [9] │ + │ before valid range │ ├─────────┴─────────┴─────────┤ + │ │ │'arr' (type: 'struct st[10]')│ + └───────────────────────────┘ └─────────────────────────────┘ + ├─────────────┬─────────────┤├────┬─────┤├──────────────┬──────────────┤ + │ │ │ + ╭──────────┴──────────╮ ╭─────┴────╮ ╭───────┴───────╮ + │under-read of 4 bytes│ │2380 bytes│ │size: 240 bytes│ + ╰─────────────────────╯ ╰──────────╯ ╰───────────────╯ + + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/pattern-test-2.c b/gcc/testsuite/gcc.dg/analyzer/pattern-test-2.c index 7c8d1b3..5b8ff7b 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pattern-test-2.c +++ b/gcc/testsuite/gcc.dg/analyzer/pattern-test-2.c @@ -26,7 +26,7 @@ void test_2 (void *p, void *q) foo(p); /* { dg-warning "pattern match on 'p != 0'" "p != 0" { target *-*-* } cond_2 } */ - /* { dg-warning "pattern match on 'tmp1 | tmp2 != 0'" "tmp1 | tmp2 != 0" { target *-*-* } cond_2 } */ + /* { dg-warning "pattern match on 'p == 0 | q == 0 != 0'" "tmp1 | tmp2 != 0" { target *-*-* } cond_2 } */ /* { dg-warning "pattern match on 'q != 0'" "q != 0" { target *-*-* } cond_2 } */ } @@ -42,6 +42,6 @@ void test_3 (void *p, void *q) foo(p); /* { dg-warning "pattern match on 'p == 0'" "p == 0" { target *-*-* } cond_3 } */ - /* { dg-warning "pattern match on 'tmp1 & tmp2 == 0'" "tmp1 & tmp2 == 0" { target *-*-* } cond_3 } */ + /* { dg-warning "pattern match on 'p == 0 & q == 0 == 0'" "tmp1 & tmp2 == 0" { target *-*-* } cond_3 } */ /* { dg-warning "pattern match on 'q == 0'" "q == 0" { target *-*-* } cond_3 } */ } diff --git a/gcc/testsuite/gcc.dg/analyzer/pr101962.c b/gcc/testsuite/gcc.dg/analyzer/pr101962.c index b878aad..5eb7cf0 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr101962.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr101962.c @@ -16,7 +16,7 @@ maybe_inc_int_ptr (int *ptr) int test_1 (void) { - int stack; /* { dg-message "region created on stack here" } */ + int stack; int *a = &stack; a = maybe_inc_int_ptr (a); a = maybe_inc_int_ptr (a); diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_gil_plugin.c b/gcc/testsuite/gcc.dg/plugin/analyzer_gil_plugin.c index e494315..e0fc9cd 100644 --- a/gcc/testsuite/gcc.dg/plugin/analyzer_gil_plugin.c +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_gil_plugin.c @@ -155,7 +155,7 @@ class double_save_thread : public gil_diagnostic return m_call == sub_other.m_call; } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { return warning_at (rich_loc, get_controlling_option (), "nested usage of %qs", "Py_BEGIN_ALLOW_THREADS"); @@ -194,7 +194,7 @@ class fncall_without_gil : public gil_diagnostic && m_arg_idx == sub_other.m_arg_idx); } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { auto_diagnostic_group d; if (m_callee_fndecl) @@ -245,7 +245,7 @@ class pyobject_usage_without_gil : public gil_diagnostic ((const pyobject_usage_without_gil&)base_other).m_expr); } - bool emit (rich_location *rich_loc) final override + bool emit (rich_location *rich_loc, logger *) final override { auto_diagnostic_group d; return warning_at (rich_loc, get_controlling_option (), |