diff options
author | Martin Liska <mliska@suse.cz> | 2022-09-12 10:43:19 +0200 |
---|---|---|
committer | Martin Liska <mliska@suse.cz> | 2022-09-12 10:43:19 +0200 |
commit | fdb97cd0b7d15efa39ba79dca44be93debb0ef12 (patch) | |
tree | 65a6d95503fb9897bda29c72a629e57bb773d1c1 /gcc/analyzer | |
parent | 918bc838c2803f08e4d7ccd179396d48cb8ec804 (diff) | |
parent | 643ae816f17745a77b62188b6bf169211609a59b (diff) | |
download | gcc-fdb97cd0b7d15efa39ba79dca44be93debb0ef12.zip gcc-fdb97cd0b7d15efa39ba79dca44be93debb0ef12.tar.gz gcc-fdb97cd0b7d15efa39ba79dca44be93debb0ef12.tar.bz2 |
Merge branch 'master' into devel/sphinx
Diffstat (limited to 'gcc/analyzer')
-rw-r--r-- | gcc/analyzer/ChangeLog | 133 | ||||
-rw-r--r-- | gcc/analyzer/analyzer.h | 36 | ||||
-rw-r--r-- | gcc/analyzer/analyzer.opt | 4 | ||||
-rw-r--r-- | gcc/analyzer/checker-path.cc | 91 | ||||
-rw-r--r-- | gcc/analyzer/checker-path.h | 34 | ||||
-rw-r--r-- | gcc/analyzer/diagnostic-manager.cc | 31 | ||||
-rw-r--r-- | gcc/analyzer/engine.cc | 16 | ||||
-rw-r--r-- | gcc/analyzer/known-function-manager.cc | 78 | ||||
-rw-r--r-- | gcc/analyzer/known-function-manager.h | 45 | ||||
-rw-r--r-- | gcc/analyzer/region-model-impl-calls.cc | 50 | ||||
-rw-r--r-- | gcc/analyzer/region-model-manager.cc | 3 | ||||
-rw-r--r-- | gcc/analyzer/region-model.cc | 1143 | ||||
-rw-r--r-- | gcc/analyzer/region-model.h | 41 | ||||
-rw-r--r-- | gcc/analyzer/region.cc | 131 | ||||
-rw-r--r-- | gcc/analyzer/region.h | 17 | ||||
-rw-r--r-- | gcc/analyzer/sm-taint.cc | 27 | ||||
-rw-r--r-- | gcc/analyzer/store.cc | 30 | ||||
-rw-r--r-- | gcc/analyzer/store.h | 12 |
18 files changed, 1797 insertions, 125 deletions
diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 3ad3096..ea6d5ee 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,136 @@ +2022-09-11 Tim Lange <mail@tim-lange.me> + + PR analyzer/106845 + * region-model.cc (region_model::check_region_bounds): + Bail out if 0 bytes were accessed. + * store.cc (byte_range::dump_to_pp): + Add special case for empty ranges. + (byte_range::exceeds_p): Restrict to non-empty ranges. + (byte_range::falls_short_of_p): Restrict to non-empty ranges. + * store.h (bit_range::empty_p): New function. + (bit_range::get_last_byte_offset): Restrict to non-empty ranges. + (byte_range::empty_p): New function. + (byte_range::get_last_byte_offset): Restrict to non-empty ranges. + +2022-09-09 David Malcolm <dmalcolm@redhat.com> + + * analyzer.opt (Wanalyzer-exposure-through-uninit-copy): New. + * checker-path.cc (region_creation_event::region_creation_event): + Add "capacity" and "kind" params. + (region_creation_event::get_desc): Generalize to different kinds + of event. + (checker_path::add_region_creation_event): Convert to... + (checker_path::add_region_creation_events): ...this. + * checker-path.h (enum rce_kind): New. + (region_creation_event::region_creation_event): Add "capacity" and + "kind" params. + (region_creation_event::m_capacity): New field. + (region_creation_event::m_rce_kind): New field. + (checker_path::add_region_creation_event): Convert to... + (checker_path::add_region_creation_events): ...this. + * diagnostic-manager.cc (diagnostic_manager::build_emission_path): + Update for multiple region creation events. + (diagnostic_manager::add_event_on_final_node): Likewise. + (diagnostic_manager::add_events_for_eedge): Likewise. + * region-model-impl-calls.cc (call_details::get_logger): New. + * region-model.cc: Define INCLUDE_MEMORY before including + "system.h". Include "gcc-rich-location.h". + (class record_layout): New. + (class exposure_through_uninit_copy): New. + (contains_uninit_p): New. + (region_model::maybe_complain_about_infoleak): New. + * region-model.h (call_details::get_logger): New decl. + (region_model::maybe_complain_about_infoleak): New decl. + (region_model::mark_as_tainted): New decl. + * sm-taint.cc (region_model::mark_as_tainted): New. + +2022-09-09 David Malcolm <dmalcolm@redhat.com> + + * analyzer.h (class known_function_manager): New forward decl. + (class known_function): New. + (plugin_analyzer_init_iface::register_known_function): New. + * engine.cc: Include "analyzer/known-function-manager.h". + (plugin_analyzer_init_impl::plugin_analyzer_init_impl): Add + known_fn_mgr param. + (plugin_analyzer_init_impl::register_state_machine): Add + LOC_SCOPE. + (plugin_analyzer_init_impl::register_known_function): New. + (plugin_analyzer_init_impl::m_known_fn_mgr): New. + (impl_run_checkers): Update plugin callback invocation to use + eng's known_function_manager. + * known-function-manager.cc: New file. + * known-function-manager.h: New file. + * region-model-manager.cc + (region_model_manager::region_model_manager): Pass logger to + m_known_fn_mgr's ctor. + * region-model.cc (region_model::update_for_zero_return): New. + (region_model::update_for_nonzero_return): New. + (maybe_simplify_upper_bound): New. + (region_model::maybe_get_copy_bounds): New. + (region_model::get_known_function): New. + (region_model::on_call_pre): Handle plugin-supplied known + functions. + * region-model.h: Include "analyzer/known-function-manager.h". + (region_model_manager::get_known_function_manager): New. + (region_model_manager::m_known_fn_mgr): New. + (call_details::get_model): New accessor. + (region_model::maybe_get_copy_bounds): New decl. + (region_model::update_for_zero_return): New decl. + (region_model::update_for_nonzero_return): New decl. + (region_model::get_known_function): New decl. + (region_model::get_known_function_manager): New. + +2022-09-08 Tim Lange <mail@tim-lange.me> + + PR analyzer/106625 + * analyzer.h (region_offset): Eliminate m_is_symbolic member. + * region-model-impl-calls.cc (region_model::impl_call_realloc): + Refine implementation to be more precise. + * region-model.cc (class symbolic_past_the_end): + Abstract diagnostic class to complain about accesses past the end + with symbolic values. + (class symbolic_buffer_overflow): + Concrete diagnostic class to complain about buffer overflows with + symbolic values. + (class symbolic_buffer_overread): + Concrete diagnostic class to complain about buffer overreads with + symbolic values. + (region_model::check_symbolic_bounds): New function. + (maybe_get_integer_cst_tree): New helper function. + (region_model::check_region_bounds): + Add call to check_symbolic_bounds if offset is not concrete. + (region_model::eval_condition_without_cm): + Add support for EQ_EXPR and GT_EXPR with binaryop_svalues. + (is_positive_svalue): New hleper function. + (region_model::symbolic_greater_than): + New function to handle GT_EXPR comparisons with symbolic values. + (region_model::structural_equality): New function to compare + whether two svalues are structured the same, i.e. evaluate to + the same value. + (test_struct): Reflect changes to region::calc_offset. + (test_var): Likewise. + (test_array_2): Likewise and add selftest with symbolic i. + * region-model.h (class region_model): Add check_symbolic_bounds, + symbolic_greater_than and structural_equality. + * region.cc (region::get_offset): + Reflect changes to region::calc_offset. + (region::calc_offset): + Compute the symbolic offset if the offset is not concrete. + (region::get_relative_symbolic_offset): New function to return the + symbolic offset in bytes relative to its parent. + (field_region::get_relative_symbolic_offset): Likewise. + (element_region::get_relative_symbolic_offset): Likewise. + (offset_region::get_relative_symbolic_offset): Likewise. + (bit_range_region::get_relative_symbolic_offset): Likewise. + * region.h: Add get_relative_symbolic_offset. + * store.cc (binding_key::make): + Reflect changes to region::calc_offset. + (binding_map::apply_ctor_val_to_range): Likewise. + (binding_map::apply_ctor_pair_to_child_region): Likewise. + (binding_cluster::bind_compound_sval): Likewise. + (binding_cluster::get_any_binding): Likewise. + (binding_cluster::maybe_get_compound_binding): Likewise. + 2022-09-05 Tim Lange <mail@tim-lange.me> * region-model-impl-calls.cc (region_model::impl_call_strcpy): diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h index dcefc13..b325aee 100644 --- a/gcc/analyzer/analyzer.h +++ b/gcc/analyzer/analyzer.h @@ -113,6 +113,7 @@ class engine; class state_machine; class logger; class visitor; +class known_function_manager; /* Forward decls of functions. */ @@ -172,16 +173,17 @@ public: static region_offset make_concrete (const region *base_region, bit_offset_t offset) { - return region_offset (base_region, offset, false); + return region_offset (base_region, offset, NULL); } - static region_offset make_symbolic (const region *base_region) + static region_offset make_symbolic (const region *base_region, + const svalue *sym_offset) { - return region_offset (base_region, 0, true); + return region_offset (base_region, 0, sym_offset); } const region *get_base_region () const { return m_base_region; } - bool symbolic_p () const { return m_is_symbolic; } + bool symbolic_p () const { return m_sym_offset != NULL; } bit_offset_t get_bit_offset () const { @@ -189,34 +191,52 @@ public: return m_offset; } + const svalue *get_symbolic_byte_offset () const + { + gcc_assert (symbolic_p ()); + return m_sym_offset; + } + bool operator== (const region_offset &other) const { return (m_base_region == other.m_base_region && m_offset == other.m_offset - && m_is_symbolic == other.m_is_symbolic); + && m_sym_offset == other.m_sym_offset); } private: region_offset (const region *base_region, bit_offset_t offset, - bool is_symbolic) - : m_base_region (base_region), m_offset (offset), m_is_symbolic (is_symbolic) + const svalue *sym_offset) + : m_base_region (base_region), m_offset (offset), m_sym_offset (sym_offset) {} const region *m_base_region; bit_offset_t m_offset; - bool m_is_symbolic; + const svalue *m_sym_offset; }; extern location_t get_stmt_location (const gimple *stmt, function *fun); extern bool compat_types_p (tree src_type, tree dst_type); +/* Abstract base class for simulating the behavior of known functions, + supplied by plugins. */ + +class known_function +{ +public: + virtual ~known_function () {} + virtual void impl_call_pre (const call_details &cd) const = 0; +}; + /* Passed by pointer to PLUGIN_ANALYZER_INIT callbacks. */ class plugin_analyzer_init_iface { public: virtual void register_state_machine (state_machine *) = 0; + virtual void register_known_function (const char *name, + known_function *) = 0; virtual logger *get_logger () const = 0; }; diff --git a/gcc/analyzer/analyzer.opt b/gcc/analyzer/analyzer.opt index 437ea92..dbab3b8 100644 --- a/gcc/analyzer/analyzer.opt +++ b/gcc/analyzer/analyzer.opt @@ -70,6 +70,10 @@ Wanalyzer-exposure-through-output-file Common Var(warn_analyzer_exposure_through_output_file) Init(1) Warning Warn about code paths in which sensitive data is written to a file. +Wanalyzer-exposure-through-uninit-copy +Common Var(warn_analyzer_exposure_through_uninit_copy) Init(1) Warning +Warn about code paths in which sensitive data is copied across a security boundary. + Wanalyzer-fd-access-mode-mismatch Common Var(warn_analyzer_fd_mode_mismatch) Init(1) Warning Warn about code paths in which read on a write-only file descriptor is attempted, or vice versa. diff --git a/gcc/analyzer/checker-path.cc b/gcc/analyzer/checker-path.cc index 273f40d..22bae2f 100644 --- a/gcc/analyzer/checker-path.cc +++ b/gcc/analyzer/checker-path.cc @@ -288,16 +288,25 @@ statement_event::get_desc (bool) const /* class region_creation_event : public checker_event. */ region_creation_event::region_creation_event (const region *reg, + tree capacity, + enum rce_kind kind, location_t loc, tree fndecl, int depth) : checker_event (EK_REGION_CREATION, loc, fndecl, depth), - m_reg (reg) + m_reg (reg), + m_capacity (capacity), + m_rce_kind (kind) { + if (m_rce_kind == RCE_CAPACITY) + gcc_assert (capacity); } /* Implementation of diagnostic_event::get_desc vfunc for - region_creation_event. */ + region_creation_event. + There are effectively 3 kinds of region_region_event, to + avoid combinatorial explosion by trying to convy the + information in a single message. */ label_text region_creation_event::get_desc (bool can_colorize) const @@ -311,14 +320,50 @@ region_creation_event::get_desc (bool can_colorize) const return custom_desc; } - switch (m_reg->get_memory_space ()) + switch (m_rce_kind) { default: - return label_text::borrow ("region created here"); - case MEMSPACE_STACK: - return label_text::borrow ("region created on stack here"); - case MEMSPACE_HEAP: - return label_text::borrow ("region created on heap here"); + gcc_unreachable (); + + case RCE_MEM_SPACE: + switch (m_reg->get_memory_space ()) + { + default: + return label_text::borrow ("region created here"); + case MEMSPACE_STACK: + return label_text::borrow ("region created on stack here"); + case MEMSPACE_HEAP: + return label_text::borrow ("region created on heap here"); + } + break; + + case RCE_CAPACITY: + gcc_assert (m_capacity); + if (TREE_CODE (m_capacity) == INTEGER_CST) + { + unsigned HOST_WIDE_INT hwi = tree_to_uhwi (m_capacity); + if (hwi == 1) + return make_label_text (can_colorize, + "capacity: %wu byte", hwi); + else + return make_label_text (can_colorize, + "capacity: %wu bytes", hwi); + } + else + return make_label_text (can_colorize, + "capacity: %qE bytes", m_capacity); + + case RCE_DEBUG: + { + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + pp_string (&pp, "region creation: "); + m_reg->dump_to_pp (&pp, true); + if (m_capacity) + pp_printf (&pp, " capacity: %qE", m_capacity); + return label_text::take (xstrdup (pp_formatted_text (&pp))); + } + break; } } @@ -1207,15 +1252,33 @@ checker_path::debug () const } } -/* Add region_creation_event instance to this path for REG, - describing whether REG is on the stack or heap. */ +/* Add region_creation_event instances to this path for REG, + describing whether REG is on the stack or heap and what + its capacity is (if known). + If DEBUG is true, also create an RCE_DEBUG event. */ void -checker_path::add_region_creation_event (const region *reg, - location_t loc, - tree fndecl, int depth) +checker_path::add_region_creation_events (const region *reg, + const region_model *model, + location_t loc, + tree fndecl, int depth, + bool debug) { - add_event (new region_creation_event (reg, loc, fndecl, depth)); + tree capacity = NULL_TREE; + if (model) + if (const svalue *capacity_sval = model->get_capacity (reg)) + capacity = model->get_representative_tree (capacity_sval); + + add_event (new region_creation_event (reg, capacity, RCE_MEM_SPACE, + loc, fndecl, depth)); + + if (capacity) + add_event (new region_creation_event (reg, capacity, RCE_CAPACITY, + loc, fndecl, depth)); + + if (debug) + add_event (new region_creation_event (reg, capacity, RCE_DEBUG, + loc, fndecl, depth)); } /* Add a warning_event to the end of this path. */ diff --git a/gcc/analyzer/checker-path.h b/gcc/analyzer/checker-path.h index 8e48d8a..5d00934 100644 --- a/gcc/analyzer/checker-path.h +++ b/gcc/analyzer/checker-path.h @@ -210,19 +210,43 @@ public: const program_state m_dst_state; }; +/* There are too many combinations to express region creation in one message, + so we emit multiple region_creation_event instances when each pertinent + region is created. + + This enum distinguishes between the different messages. */ + +enum rce_kind +{ + /* Generate a message based on the memory space of the region + e.g. "region created on stack here". */ + RCE_MEM_SPACE, + + /* Generate a message based on the capacity of the region + e.g. "capacity: 100 bytes". */ + RCE_CAPACITY, + + /* Generate a debug message. */ + RCE_DEBUG +}; + /* A concrete event subclass describing the creation of a region that - is significant for a diagnostic e.g. "region created on stack here". */ + is significant for a diagnostic. */ class region_creation_event : public checker_event { public: region_creation_event (const region *reg, + tree capacity, + enum rce_kind kind, location_t loc, tree fndecl, int depth); label_text get_desc (bool can_colorize) const final override; private: const region *m_reg; + tree m_capacity; + enum rce_kind m_rce_kind; }; /* An event subclass describing the entry to a function. */ @@ -632,9 +656,11 @@ public: m_events[idx] = new_event; } - void add_region_creation_event (const region *reg, - location_t loc, - tree fndecl, int depth); + void add_region_creation_events (const region *reg, + const region_model *model, + location_t loc, + tree fndecl, int depth, + bool debug); void add_final_event (const state_machine *sm, const exploded_node *enode, const gimple *stmt, diff --git a/gcc/analyzer/diagnostic-manager.cc b/gcc/analyzer/diagnostic-manager.cc index fded828..2d185a1 100644 --- a/gcc/analyzer/diagnostic-manager.cc +++ b/gcc/analyzer/diagnostic-manager.cc @@ -1460,11 +1460,12 @@ diagnostic_manager::build_emission_path (const path_builder &pb, if (DECL_P (decl) && DECL_SOURCE_LOCATION (decl) != UNKNOWN_LOCATION) { - emission_path->add_region_creation_event - (reg, + emission_path->add_region_creation_events + (reg, NULL, DECL_SOURCE_LOCATION (decl), NULL_TREE, - 0); + 0, + m_verbosity > 3); } } } @@ -1524,11 +1525,13 @@ diagnostic_manager::add_event_on_final_node (const exploded_node *final_enode, break; case RK_HEAP_ALLOCATED: case RK_ALLOCA: - emission_path->add_region_creation_event + emission_path->add_region_creation_events (reg, - src_point.get_location (), - src_point.get_fndecl (), - src_stack_depth); + dst_model, + src_point.get_location (), + src_point.get_fndecl (), + src_stack_depth, + false); emitted = true; break; } @@ -1939,11 +1942,12 @@ diagnostic_manager::add_events_for_eedge (const path_builder &pb, if (DECL_P (decl) && DECL_SOURCE_LOCATION (decl) != UNKNOWN_LOCATION) { - emission_path->add_region_creation_event - (reg, + emission_path->add_region_creation_events + (reg, dst_state.m_region_model, DECL_SOURCE_LOCATION (decl), dst_point.get_fndecl (), - dst_stack_depth); + dst_stack_depth, + m_verbosity > 3); } } } @@ -2033,11 +2037,12 @@ diagnostic_manager::add_events_for_eedge (const path_builder &pb, break; case RK_HEAP_ALLOCATED: case RK_ALLOCA: - emission_path->add_region_creation_event - (reg, + emission_path->add_region_creation_events + (reg, dst_model, src_point.get_location (), src_point.get_fndecl (), - src_stack_depth); + src_stack_depth, + m_verbosity > 3); break; } } diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index e8db00d..742ac02 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -71,6 +71,7 @@ along with GCC; see the file COPYING3. If not see #include "stringpool.h" #include "attribs.h" #include "tree-dfa.h" +#include "analyzer/known-function-manager.h" /* For an overview, see gcc/doc/analyzer.texi. */ @@ -5813,16 +5814,26 @@ class plugin_analyzer_init_impl : public plugin_analyzer_init_iface { public: plugin_analyzer_init_impl (auto_delete_vec <state_machine> *checkers, + known_function_manager *known_fn_mgr, logger *logger) : m_checkers (checkers), + m_known_fn_mgr (known_fn_mgr), m_logger (logger) {} void register_state_machine (state_machine *sm) final override { + LOG_SCOPE (m_logger); m_checkers->safe_push (sm); } + void register_known_function (const char *name, + known_function *kf) final override + { + LOG_SCOPE (m_logger); + m_known_fn_mgr->add (name, kf); + } + logger *get_logger () const final override { return m_logger; @@ -5830,6 +5841,7 @@ public: private: auto_delete_vec <state_machine> *m_checkers; + known_function_manager *m_known_fn_mgr; logger *m_logger; }; @@ -5885,7 +5897,9 @@ impl_run_checkers (logger *logger) auto_delete_vec <state_machine> checkers; make_checkers (checkers, logger); - plugin_analyzer_init_impl data (&checkers, logger); + plugin_analyzer_init_impl data (&checkers, + eng.get_known_function_manager (), + logger); invoke_plugin_callbacks (PLUGIN_ANALYZER_INIT, &data); if (logger) diff --git a/gcc/analyzer/known-function-manager.cc b/gcc/analyzer/known-function-manager.cc new file mode 100644 index 0000000..f0fd4fc --- /dev/null +++ b/gcc/analyzer/known-function-manager.cc @@ -0,0 +1,78 @@ +/* Support for plugin-supplied behaviors of known functions. + Copyright (C) 2022 Free Software Foundation, Inc. + Contributed by David Malcolm <dmalcolm@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "function.h" +#include "analyzer/analyzer.h" +#include "diagnostic-core.h" +#include "analyzer/analyzer-logging.h" +#include "stringpool.h" +#include "analyzer/known-function-manager.h" + +#if ENABLE_ANALYZER + +namespace ana { + +/* class known_function_manager : public log_user. */ + +known_function_manager::known_function_manager (logger *logger) +: log_user (logger) +{ +} + +known_function_manager::~known_function_manager () +{ + /* Delete all owned kfs. */ + for (auto iter : m_map_id_to_kf) + delete iter.second; +} + +void +known_function_manager::add (const char *name, known_function *kf) +{ + LOG_FUNC_1 (get_logger (), "registering %s", name); + tree id = get_identifier (name); + m_map_id_to_kf.put (id, kf); +} + +const known_function * +known_function_manager::get_by_identifier (tree identifier) +{ + known_function **slot = m_map_id_to_kf.get (identifier); + if (slot) + return *slot; + else + return NULL; +} + +const known_function * +known_function_manager::get_by_fndecl (tree fndecl) +{ + if (tree identifier = DECL_NAME (fndecl)) + return get_by_identifier (identifier); + return NULL; +} + +} // namespace ana + +#endif /* #if ENABLE_ANALYZER */ diff --git a/gcc/analyzer/known-function-manager.h b/gcc/analyzer/known-function-manager.h new file mode 100644 index 0000000..fbde853 --- /dev/null +++ b/gcc/analyzer/known-function-manager.h @@ -0,0 +1,45 @@ +/* Support for plugin-supplied behaviors of known functions. + Copyright (C) 2022 Free Software Foundation, Inc. + Contributed by David Malcolm <dmalcolm@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_ANALYZER_KNOWN_FUNCTION_MANAGER_H +#define GCC_ANALYZER_KNOWN_FUNCTION_MANAGER_H + +namespace ana { + +class known_function_manager : public log_user +{ +public: + known_function_manager (logger *logger); + ~known_function_manager (); + void add (const char *name, known_function *kf); + const known_function *get_by_identifier (tree identifier); + const known_function *get_by_fndecl (tree fndecl); + +private: + DISABLE_COPY_AND_ASSIGN (known_function_manager); + + /* Map from identifier to known_function instance. + Has ownership of the latter. */ + hash_map<tree, known_function *> m_map_id_to_kf; +}; + +} // namespace ana + +#endif /* GCC_ANALYZER_KNOWN_FUNCTION_MANAGER_H */ diff --git a/gcc/analyzer/region-model-impl-calls.cc b/gcc/analyzer/region-model-impl-calls.cc index 3790eaf..71fb277 100644 --- a/gcc/analyzer/region-model-impl-calls.cc +++ b/gcc/analyzer/region-model-impl-calls.cc @@ -91,6 +91,17 @@ call_details::get_manager () const return m_model->get_manager (); } +/* Get any logger associated with this object. */ + +logger * +call_details::get_logger () const +{ + if (m_ctxt) + return m_ctxt->get_logger (); + else + return NULL; +} + /* Get any uncertainty_t associated with the region_model_context. */ uncertainty_t * @@ -850,7 +861,7 @@ region_model::impl_call_realloc (const call_details &cd) if (old_size_sval) { const svalue *copied_size_sval - = get_copied_size (old_size_sval, new_size_sval); + = get_copied_size (model, old_size_sval, new_size_sval); const region *copied_old_reg = model->m_mgr->get_sized_region (freed_reg, NULL, copied_size_sval); @@ -896,35 +907,22 @@ region_model::impl_call_realloc (const call_details &cd) private: /* Return the lesser of OLD_SIZE_SVAL and NEW_SIZE_SVAL. - If either one is symbolic, the symbolic svalue is returned. */ - const svalue *get_copied_size (const svalue *old_size_sval, + If unknown, OLD_SIZE_SVAL is returned. */ + const svalue *get_copied_size (region_model *model, + const svalue *old_size_sval, const svalue *new_size_sval) const { - tree old_size_cst = old_size_sval->maybe_get_constant (); - tree new_size_cst = new_size_sval->maybe_get_constant (); - - if (old_size_cst && new_size_cst) + tristate res + = model->eval_condition (old_size_sval, GT_EXPR, new_size_sval); + switch (res.get_value ()) { - /* Both are constants and comparable. */ - tree cmp = fold_binary (LT_EXPR, boolean_type_node, - old_size_cst, new_size_cst); - - if (cmp == boolean_true_node) - return old_size_sval; - else - return new_size_sval; - } - else if (new_size_cst) - { - /* OLD_SIZE_SVAL is symbolic, so return that. */ - return old_size_sval; - } - else - { - /* NEW_SIZE_SVAL is symbolic or both are symbolic. - Return NEW_SIZE_SVAL, because implementations of realloc - probably only moves the buffer if the new size is larger. */ + case tristate::TS_TRUE: return new_size_sval; + case tristate::TS_FALSE: + case tristate::TS_UNKNOWN: + return old_size_sval; + default: + gcc_unreachable (); } } }; diff --git a/gcc/analyzer/region-model-manager.cc b/gcc/analyzer/region-model-manager.cc index 17713b0..cbda77f 100644 --- a/gcc/analyzer/region-model-manager.cc +++ b/gcc/analyzer/region-model-manager.cc @@ -81,7 +81,8 @@ region_model_manager::region_model_manager (logger *logger) m_globals_region (alloc_region_id (), &m_root_region), m_globals_map (), m_store_mgr (this), - m_range_mgr (new bounded_ranges_manager ()) + m_range_mgr (new bounded_ranges_manager ()), + m_known_fn_mgr (logger) { } diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index e84087a..22c5287 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -19,6 +19,7 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ #include "config.h" +#define INCLUDE_MEMORY #include "system.h" #include "coretypes.h" #include "tree.h" @@ -74,6 +75,7 @@ along with GCC; see the file COPYING3. If not see #include "ssa-iterators.h" #include "calls.h" #include "is-a.h" +#include "gcc-rich-location.h" #if ENABLE_ANALYZER @@ -1268,7 +1270,7 @@ region_model::on_stmt_pre (const gimple *stmt, } } -/* Abstract base class for all out-of-bounds warnings. */ +/* Abstract base class for all out-of-bounds warnings with concrete values. */ class out_of_bounds : public pending_diagnostic_subclass<out_of_bounds> { @@ -1591,49 +1593,288 @@ public: } }; +/* Abstract class to complain about out-of-bounds read/writes where + the values are symbolic. */ + +class symbolic_past_the_end + : public pending_diagnostic_subclass<symbolic_past_the_end> +{ +public: + symbolic_past_the_end (const region *reg, tree diag_arg, tree offset, + tree num_bytes, tree capacity) + : m_reg (reg), m_diag_arg (diag_arg), m_offset (offset), + m_num_bytes (num_bytes), m_capacity (capacity) + {} + + const char *get_kind () const final override + { + return "symbolic_past_the_end"; + } + + bool operator== (const symbolic_past_the_end &other) const + { + return m_reg == other.m_reg + && pending_diagnostic::same_tree_p (m_diag_arg, other.m_diag_arg) + && pending_diagnostic::same_tree_p (m_offset, other.m_offset) + && pending_diagnostic::same_tree_p (m_num_bytes, other.m_num_bytes) + && pending_diagnostic::same_tree_p (m_capacity, other.m_capacity); + } + + int get_controlling_option () const final override + { + return OPT_Wanalyzer_out_of_bounds; + } + + void mark_interesting_stuff (interesting_t *interest) final override + { + interest->add_region_creation (m_reg); + } + + label_text + describe_region_creation_event (const evdesc::region_creation &ev) final + override + { + if (m_capacity) + return ev.formatted_print ("capacity is %qE bytes", m_capacity); + + return label_text (); + } + + label_text + describe_final_event (const evdesc::final_event &ev) final override + { + const char *byte_str; + if (pending_diagnostic::same_tree_p (m_num_bytes, integer_one_node)) + byte_str = "byte"; + else + byte_str = "bytes"; + + if (m_offset) + { + if (m_num_bytes && TREE_CODE (m_num_bytes) == INTEGER_CST) + { + if (m_diag_arg) + return ev.formatted_print ("%s of %E %s at offset %qE" + " exceeds %qE", m_dir_str, + m_num_bytes, byte_str, + m_offset, m_diag_arg); + else + return ev.formatted_print ("%s of %E %s at offset %qE" + " exceeds the buffer", m_dir_str, + m_num_bytes, byte_str, m_offset); + } + else if (m_num_bytes) + { + if (m_diag_arg) + return ev.formatted_print ("%s of %qE %s at offset %qE" + " exceeds %qE", m_dir_str, + m_num_bytes, byte_str, + m_offset, m_diag_arg); + else + return ev.formatted_print ("%s of %qE %s at offset %qE" + " exceeds the buffer", m_dir_str, + m_num_bytes, byte_str, m_offset); + } + else + { + if (m_diag_arg) + return ev.formatted_print ("%s at offset %qE exceeds %qE", + m_dir_str, m_offset, m_diag_arg); + else + return ev.formatted_print ("%s at offset %qE exceeds the" + " buffer", m_dir_str, m_offset); + } + } + if (m_diag_arg) + return ev.formatted_print ("out-of-bounds %s on %qE", + m_dir_str, m_diag_arg); + return ev.formatted_print ("out-of-bounds %s", m_dir_str); + } + +protected: + const region *m_reg; + tree m_diag_arg; + tree m_offset; + tree m_num_bytes; + tree m_capacity; + const char *m_dir_str; +}; + +/* Concrete subclass to complain about overflows with symbolic values. */ + +class symbolic_buffer_overflow : public symbolic_past_the_end +{ +public: + symbolic_buffer_overflow (const region *reg, tree diag_arg, tree offset, + tree num_bytes, tree capacity) + : symbolic_past_the_end (reg, diag_arg, offset, num_bytes, capacity) + { + m_dir_str = "write"; + } + + bool emit (rich_location *rich_loc) final override + { + diagnostic_metadata m; + switch (m_reg->get_memory_space ()) + { + default: + m.add_cwe (787); + return warning_meta (rich_loc, m, get_controlling_option (), + "buffer overflow"); + case MEMSPACE_STACK: + m.add_cwe (121); + return warning_meta (rich_loc, m, get_controlling_option (), + "stack-based buffer overflow"); + case MEMSPACE_HEAP: + m.add_cwe (122); + return warning_meta (rich_loc, m, get_controlling_option (), + "heap-based buffer overflow"); + } + } +}; + +/* Concrete subclass to complain about overreads with symbolic values. */ + +class symbolic_buffer_overread : public symbolic_past_the_end +{ +public: + symbolic_buffer_overread (const region *reg, tree diag_arg, tree offset, + tree num_bytes, tree capacity) + : symbolic_past_the_end (reg, diag_arg, offset, num_bytes, capacity) + { + m_dir_str = "read"; + } + + bool emit (rich_location *rich_loc) final override + { + diagnostic_metadata m; + m.add_cwe (126); + return warning_meta (rich_loc, m, get_controlling_option (), + "buffer overread"); + } +}; + +/* Check whether an access is past the end of the BASE_REG. */ + +void region_model::check_symbolic_bounds (const region *base_reg, + const svalue *sym_byte_offset, + const svalue *num_bytes_sval, + const svalue *capacity, + enum access_direction dir, + region_model_context *ctxt) const +{ + gcc_assert (ctxt); + + const svalue *next_byte + = m_mgr->get_or_create_binop (num_bytes_sval->get_type (), PLUS_EXPR, + sym_byte_offset, num_bytes_sval); + + if (eval_condition_without_cm (next_byte, GT_EXPR, capacity).is_true ()) + { + tree diag_arg = get_representative_tree (base_reg); + tree offset_tree = get_representative_tree (sym_byte_offset); + tree num_bytes_tree = get_representative_tree (num_bytes_sval); + tree capacity_tree = get_representative_tree (capacity); + switch (dir) + { + default: + gcc_unreachable (); + break; + case DIR_READ: + ctxt->warn (new symbolic_buffer_overread (base_reg, diag_arg, + offset_tree, + num_bytes_tree, + capacity_tree)); + break; + case DIR_WRITE: + ctxt->warn (new symbolic_buffer_overflow (base_reg, diag_arg, + offset_tree, + num_bytes_tree, + capacity_tree)); + break; + } + } +} + +static tree +maybe_get_integer_cst_tree (const svalue *sval) +{ + tree cst_tree = sval->maybe_get_constant (); + if (cst_tree && TREE_CODE (cst_tree) == INTEGER_CST) + return cst_tree; + + return NULL_TREE; +} + /* May complain when the access on REG is out-of-bounds. */ -void region_model::check_region_bounds (const region *reg, - enum access_direction dir, - region_model_context *ctxt) const +void +region_model::check_region_bounds (const region *reg, + enum access_direction dir, + region_model_context *ctxt) const { gcc_assert (ctxt); - region_offset reg_offset = reg->get_offset (); + /* Get the offset. */ + region_offset reg_offset = reg->get_offset (m_mgr); const region *base_reg = reg_offset.get_base_region (); - /* Bail out on symbolic offsets or symbolic regions. + /* Bail out on symbolic regions. (e.g. because the analyzer did not see previous offsets on the latter, it might think that a negative access is before the buffer). */ - if (reg_offset.symbolic_p () || base_reg->symbolic_p ()) + if (base_reg->symbolic_p ()) return; - byte_offset_t offset_unsigned - = reg_offset.get_bit_offset () >> LOG2_BITS_PER_UNIT; + + /* Find out how many bytes were accessed. */ + const svalue *num_bytes_sval = reg->get_byte_size_sval (m_mgr); + tree num_bytes_tree = maybe_get_integer_cst_tree (num_bytes_sval); + /* Bail out if 0 bytes are accessed. */ + if (num_bytes_tree && zerop (num_bytes_tree)) + return; + + /* Get the capacity of the buffer. */ + const svalue *capacity = get_capacity (base_reg); + tree cst_capacity_tree = maybe_get_integer_cst_tree (capacity); + /* The constant offset from a pointer is represented internally as a sizetype but should be interpreted as a signed value here. The statement below - converts the offset to a signed integer with the same precision the - sizetype has on the target system. + converts the offset from bits to bytes and then to a signed integer with + the same precision the sizetype has on the target system. For example, this is needed for out-of-bounds-3.c test1 to pass when compiled with a 64-bit gcc build targeting 32-bit systems. */ - byte_offset_t offset - = offset_unsigned.to_shwi (TYPE_PRECISION (size_type_node)); - - /* Find out how many bytes were accessed. */ - const svalue *num_bytes_sval = reg->get_byte_size_sval (m_mgr); - tree num_bytes_tree = num_bytes_sval->maybe_get_constant (); - if (!num_bytes_tree || TREE_CODE (num_bytes_tree) != INTEGER_CST) - /* If we do not know how many bytes were read/written, - assume that at least one byte was read/written. */ - num_bytes_tree = integer_one_node; + byte_offset_t offset; + if (!reg_offset.symbolic_p ()) + offset = wi::sext (reg_offset.get_bit_offset () >> LOG2_BITS_PER_UNIT, + TYPE_PRECISION (size_type_node)); + + /* If either the offset or the number of bytes accessed are symbolic, + we have to reason about symbolic values. */ + if (reg_offset.symbolic_p () || !num_bytes_tree) + { + const svalue* byte_offset_sval; + if (!reg_offset.symbolic_p ()) + { + tree offset_tree = wide_int_to_tree (integer_type_node, offset); + byte_offset_sval + = m_mgr->get_or_create_constant_svalue (offset_tree); + } + else + byte_offset_sval = reg_offset.get_symbolic_byte_offset (); + check_symbolic_bounds (base_reg, byte_offset_sval, num_bytes_sval, + capacity, dir, ctxt); + return; + } + /* Otherwise continue to check with concrete values. */ byte_range out (0, 0); /* NUM_BYTES_TREE should always be interpreted as unsigned. */ - byte_range read_bytes (offset, wi::to_offset (num_bytes_tree).to_uhwi ()); + byte_offset_t num_bytes_unsigned = wi::to_offset (num_bytes_tree); + byte_range read_bytes (offset, num_bytes_unsigned); /* If read_bytes has a subset < 0, we do have an underflow. */ if (read_bytes.falls_short_of_p (0, &out)) { - tree diag_arg = get_representative_tree (reg->get_base_region ()); + tree diag_arg = get_representative_tree (base_reg); switch (dir) { default: @@ -1648,9 +1889,10 @@ void region_model::check_region_bounds (const region *reg, } } - const svalue *capacity = get_capacity (base_reg); - tree cst_capacity_tree = capacity->maybe_get_constant (); - if (!cst_capacity_tree || TREE_CODE (cst_capacity_tree) != INTEGER_CST) + /* For accesses past the end, we do need a concrete capacity. No need to + do a symbolic check here because the inequality check does not reason + whether constants are greater than symbolic values. */ + if (!cst_capacity_tree) return; byte_range buffer (0, wi::to_offset (cst_capacity_tree)); @@ -1659,7 +1901,7 @@ void region_model::check_region_bounds (const region *reg, { tree byte_bound = wide_int_to_tree (size_type_node, buffer.get_next_byte_offset ()); - tree diag_arg = get_representative_tree (reg->get_base_region ()); + tree diag_arg = get_representative_tree (base_reg); switch (dir) { @@ -1731,6 +1973,110 @@ maybe_get_const_fn_result (const call_details &cd) return sval; } +/* Update this model for an outcome of a call that returns zero. + If UNMERGEABLE, then make the result unmergeable, e.g. to prevent + the state-merger code from merging success and failure outcomes. */ + +void +region_model::update_for_zero_return (const call_details &cd, + bool unmergeable) +{ + if (!cd.get_lhs_type ()) + return; + const svalue *result + = m_mgr->get_or_create_int_cst (cd.get_lhs_type (), 0); + if (unmergeable) + result = m_mgr->get_or_create_unmergeable (result); + set_value (cd.get_lhs_region (), result, cd.get_ctxt ()); +} + +/* Update this model for an outcome of a call that returns non-zero. */ + +void +region_model::update_for_nonzero_return (const call_details &cd) +{ + if (!cd.get_lhs_type ()) + return; + const svalue *zero + = m_mgr->get_or_create_int_cst (cd.get_lhs_type (), 0); + const svalue *result + = get_store_value (cd.get_lhs_region (), cd.get_ctxt ()); + add_constraint (result, NE_EXPR, zero, cd.get_ctxt ()); +} + +/* Subroutine of region_model::maybe_get_copy_bounds. + The Linux kernel commonly uses + min_t([unsigned] long, VAR, sizeof(T)); + to set an upper bound on the size of a copy_to_user. + Attempt to simplify such sizes by trying to get the upper bound as a + constant. + Return the simplified svalue if possible, or NULL otherwise. */ + +static const svalue * +maybe_simplify_upper_bound (const svalue *num_bytes_sval, + region_model_manager *mgr) +{ + tree type = num_bytes_sval->get_type (); + while (const svalue *raw = num_bytes_sval->maybe_undo_cast ()) + num_bytes_sval = raw; + if (const binop_svalue *binop_sval = num_bytes_sval->dyn_cast_binop_svalue ()) + if (binop_sval->get_op () == MIN_EXPR) + if (binop_sval->get_arg1 ()->get_kind () == SK_CONSTANT) + { + return mgr->get_or_create_cast (type, binop_sval->get_arg1 ()); + /* TODO: we might want to also capture the constraint + when recording the diagnostic, or note that we're using + the upper bound. */ + } + return NULL; +} + +/* Attempt to get an upper bound for the size of a copy when simulating a + copy function. + + NUM_BYTES_SVAL is the symbolic value for the size of the copy. + Use it if it's constant, otherwise try to simplify it. Failing + that, use the size of SRC_REG if constant. + + Return a symbolic value for an upper limit on the number of bytes + copied, or NULL if no such value could be determined. */ + +const svalue * +region_model::maybe_get_copy_bounds (const region *src_reg, + const svalue *num_bytes_sval) +{ + if (num_bytes_sval->maybe_get_constant ()) + return num_bytes_sval; + + if (const svalue *simplified + = maybe_simplify_upper_bound (num_bytes_sval, m_mgr)) + num_bytes_sval = simplified; + + if (num_bytes_sval->maybe_get_constant ()) + return num_bytes_sval; + + /* For now, try just guessing the size as the capacity of the + base region of the src. + This is a hack; we might get too large a value. */ + const region *src_base_reg = src_reg->get_base_region (); + num_bytes_sval = get_capacity (src_base_reg); + + if (num_bytes_sval->maybe_get_constant ()) + return num_bytes_sval; + + /* Non-constant: give up. */ + return NULL; +} + +/* Get any known_function for FNDECL, or NULL if there is none. */ + +const known_function * +region_model::get_known_function (tree fndecl) const +{ + known_function_manager *known_fn_mgr = m_mgr->get_known_function_manager (); + return known_fn_mgr->get_by_fndecl (fndecl); +} + /* Update this model for the CALL stmt, using CTXT to report any diagnostics - the first half. @@ -1987,6 +2333,11 @@ region_model::on_call_pre (const gcall *call, region_model_context *ctxt, { /* Handle in "on_call_post". */ } + else if (const known_function *kf = get_known_function (callee_fndecl)) + { + kf->impl_call_pre (cd); + return false; + } else if (!fndecl_has_gimple_body_p (callee_fndecl) && (!(callee_fndecl_flags & (ECF_CONST | ECF_PURE))) && !fndecl_built_in_p (callee_fndecl)) @@ -3907,6 +4258,49 @@ region_model::eval_condition_without_cm (const svalue *lhs, return res; } + /* Handle comparisons between two svalues with more than one operand. */ + if (const binop_svalue *binop = lhs->dyn_cast_binop_svalue ()) + { + switch (op) + { + default: + break; + case EQ_EXPR: + { + /* TODO: binops can be equal even if they are not structurally + equal in case of commutative operators. */ + tristate res = structural_equality (lhs, rhs); + if (res.is_true ()) + return res; + } + break; + case LE_EXPR: + { + tristate res = structural_equality (lhs, rhs); + if (res.is_true ()) + return res; + } + break; + case GE_EXPR: + { + tristate res = structural_equality (lhs, rhs); + if (res.is_true ()) + return res; + res = symbolic_greater_than (binop, rhs); + if (res.is_true ()) + return res; + } + break; + case GT_EXPR: + { + tristate res = symbolic_greater_than (binop, rhs); + if (res.is_true ()) + return res; + } + break; + } + } + return tristate::TS_UNKNOWN; } @@ -3928,6 +4322,123 @@ region_model::compare_initial_and_pointer (const initial_svalue *init, return tristate::TS_UNKNOWN; } +/* Return true if SVAL is definitely positive. */ + +static bool +is_positive_svalue (const svalue *sval) +{ + if (tree cst = sval->maybe_get_constant ()) + return !zerop (cst) && get_range_pos_neg (cst) == 1; + tree type = sval->get_type (); + if (!type) + return false; + /* Consider a binary operation size_t + int. The analyzer wraps the int in + an unaryop_svalue, converting it to a size_t, but in the dynamic execution + the result is smaller than the first operand. Thus, we have to look if + the argument of the unaryop_svalue is also positive. */ + if (const unaryop_svalue *un_op = dyn_cast <const unaryop_svalue *> (sval)) + return CONVERT_EXPR_CODE_P (un_op->get_op ()) && TYPE_UNSIGNED (type) + && is_positive_svalue (un_op->get_arg ()); + return TYPE_UNSIGNED (type); +} + +/* Return true if A is definitely larger than B. + + Limitation: does not account for integer overflows and does not try to + return false, so it can not be used negated. */ + +tristate +region_model::symbolic_greater_than (const binop_svalue *bin_a, + const svalue *b) const +{ + if (bin_a->get_op () == PLUS_EXPR || bin_a->get_op () == MULT_EXPR) + { + /* Eliminate the right-hand side of both svalues. */ + if (const binop_svalue *bin_b = dyn_cast <const binop_svalue *> (b)) + if (bin_a->get_op () == bin_b->get_op () + && eval_condition_without_cm (bin_a->get_arg1 (), + GT_EXPR, + bin_b->get_arg1 ()).is_true () + && eval_condition_without_cm (bin_a->get_arg0 (), + GE_EXPR, + bin_b->get_arg0 ()).is_true ()) + return tristate (tristate::TS_TRUE); + + /* Otherwise, try to remove a positive offset or factor from BIN_A. */ + if (is_positive_svalue (bin_a->get_arg1 ()) + && eval_condition_without_cm (bin_a->get_arg0 (), + GE_EXPR, b).is_true ()) + return tristate (tristate::TS_TRUE); + } + return tristate::unknown (); +} + +/* Return true if A and B are equal structurally. + + Structural equality means that A and B are equal if the svalues A and B have + the same nodes at the same positions in the tree and the leafs are equal. + Equality for conjured_svalues and initial_svalues is determined by comparing + the pointers while constants are compared by value. That behavior is useful + to check for binaryop_svlaues that evaluate to the same concrete value but + might use one operand with a different type but the same constant value. + + For example, + binop_svalue (mult_expr, + initial_svalue (‘size_t’, decl_region (..., 'some_var')), + constant_svalue (‘size_t’, 4)) + and + binop_svalue (mult_expr, + initial_svalue (‘size_t’, decl_region (..., 'some_var'), + constant_svalue (‘sizetype’, 4)) + are structurally equal. A concrete C code example, where this occurs, can + be found in test7 of out-of-bounds-5.c. */ + +tristate +region_model::structural_equality (const svalue *a, const svalue *b) const +{ + /* If A and B are referentially equal, they are also structurally equal. */ + if (a == b) + return tristate (tristate::TS_TRUE); + + switch (a->get_kind ()) + { + default: + return tristate::unknown (); + /* SK_CONJURED and SK_INITIAL are already handled + by the referential equality above. */ + case SK_CONSTANT: + { + tree a_cst = a->maybe_get_constant (); + tree b_cst = b->maybe_get_constant (); + if (a_cst && b_cst) + return tristate (tree_int_cst_equal (a_cst, b_cst)); + } + return tristate (tristate::TS_FALSE); + case SK_UNARYOP: + { + const unaryop_svalue *un_a = as_a <const unaryop_svalue *> (a); + if (const unaryop_svalue *un_b = dyn_cast <const unaryop_svalue *> (b)) + return tristate (pending_diagnostic::same_tree_p (un_a->get_type (), + un_b->get_type ()) + && un_a->get_op () == un_b->get_op () + && structural_equality (un_a->get_arg (), + un_b->get_arg ())); + } + return tristate (tristate::TS_FALSE); + case SK_BINOP: + { + const binop_svalue *bin_a = as_a <const binop_svalue *> (a); + if (const binop_svalue *bin_b = dyn_cast <const binop_svalue *> (b)) + return tristate (bin_a->get_op () == bin_b->get_op () + && structural_equality (bin_a->get_arg0 (), + bin_b->get_arg0 ()) + && structural_equality (bin_a->get_arg1 (), + bin_b->get_arg1 ())); + } + return tristate (tristate::TS_FALSE); + } +} + /* Handle various constraints of the form: LHS: ((bool)INNER_LHS INNER_OP INNER_RHS)) OP : == or != @@ -5278,6 +5789,566 @@ region_model::unset_dynamic_extents (const region *reg) m_dynamic_extents.remove (reg); } +/* Information of the layout of a RECORD_TYPE, capturing it as a vector + of items, where each item is either a field or padding. */ + +class record_layout +{ +public: + /* An item within a record; either a field, or padding after a field. */ + struct item + { + public: + item (const bit_range &br, + tree field, + bool is_padding) + : m_bit_range (br), + m_field (field), + m_is_padding (is_padding) + { + } + + bit_offset_t get_start_bit_offset () const + { + return m_bit_range.get_start_bit_offset (); + } + bit_offset_t get_next_bit_offset () const + { + return m_bit_range.get_next_bit_offset (); + } + + bool contains_p (bit_offset_t offset) const + { + return m_bit_range.contains_p (offset); + } + + void dump_to_pp (pretty_printer *pp) const + { + if (m_is_padding) + pp_printf (pp, "padding after %qD", m_field); + else + pp_printf (pp, "%qD", m_field); + pp_string (pp, ", "); + m_bit_range.dump_to_pp (pp); + } + + bit_range m_bit_range; + tree m_field; + bool m_is_padding; + }; + + record_layout (tree record_type) + { + gcc_assert (TREE_CODE (record_type) == RECORD_TYPE); + + for (tree iter = TYPE_FIELDS (record_type); iter != NULL_TREE; + iter = DECL_CHAIN (iter)) + { + if (TREE_CODE (iter) == FIELD_DECL) + { + int iter_field_offset = int_bit_position (iter); + bit_size_t size_in_bits; + if (!int_size_in_bits (TREE_TYPE (iter), &size_in_bits)) + size_in_bits = 0; + + maybe_pad_to (iter_field_offset); + + /* Add field. */ + m_items.safe_push (item (bit_range (iter_field_offset, + size_in_bits), + iter, false)); + } + } + + /* Add any trailing padding. */ + bit_size_t size_in_bits; + if (int_size_in_bits (record_type, &size_in_bits)) + maybe_pad_to (size_in_bits); + } + + void dump_to_pp (pretty_printer *pp) const + { + unsigned i; + item *it; + FOR_EACH_VEC_ELT (m_items, i, it) + { + it->dump_to_pp (pp); + pp_newline (pp); + } + } + + DEBUG_FUNCTION void dump () const + { + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + pp.buffer->stream = stderr; + dump_to_pp (&pp); + pp_flush (&pp); + } + + const record_layout::item *get_item_at (bit_offset_t offset) const + { + unsigned i; + item *it; + FOR_EACH_VEC_ELT (m_items, i, it) + if (it->contains_p (offset)) + return it; + return NULL; + } + +private: + /* Subroutine of ctor. Add padding item to NEXT_OFFSET if necessary. */ + + void maybe_pad_to (bit_offset_t next_offset) + { + if (m_items.length () > 0) + { + const item &last_item = m_items[m_items.length () - 1]; + bit_offset_t offset_after_last_item + = last_item.get_next_bit_offset (); + if (next_offset > offset_after_last_item) + { + bit_size_t padding_size + = next_offset - offset_after_last_item; + m_items.safe_push (item (bit_range (offset_after_last_item, + padding_size), + last_item.m_field, true)); + } + } + } + + auto_vec<item> m_items; +}; + +/* A subclass of pending_diagnostic for complaining about uninitialized data + being copied across a trust boundary to an untrusted output + (e.g. copy_to_user infoleaks in the Linux kernel). */ + +class exposure_through_uninit_copy + : public pending_diagnostic_subclass<exposure_through_uninit_copy> +{ +public: + exposure_through_uninit_copy (const region *src_region, + const region *dest_region, + const svalue *copied_sval) + : m_src_region (src_region), + m_dest_region (dest_region), + m_copied_sval (copied_sval) + { + gcc_assert (m_copied_sval->get_kind () == SK_POISONED + || m_copied_sval->get_kind () == SK_COMPOUND); + } + + const char *get_kind () const final override + { + return "exposure_through_uninit_copy"; + } + + bool operator== (const exposure_through_uninit_copy &other) const + { + return (m_src_region == other.m_src_region + && m_dest_region == other.m_dest_region + && m_copied_sval == other.m_copied_sval); + } + + int get_controlling_option () const final override + { + return OPT_Wanalyzer_exposure_through_uninit_copy; + } + + bool emit (rich_location *rich_loc) final override + { + diagnostic_metadata m; + /* CWE-200: Exposure of Sensitive Information to an Unauthorized Actor. */ + m.add_cwe (200); + enum memory_space mem_space = get_src_memory_space (); + bool warned; + switch (mem_space) + { + default: + warned = warning_meta + (rich_loc, m, get_controlling_option (), + "potential exposure of sensitive information" + " by copying uninitialized data across trust boundary"); + break; + case MEMSPACE_STACK: + warned = warning_meta + (rich_loc, m, get_controlling_option (), + "potential exposure of sensitive information" + " by copying uninitialized data from stack across trust boundary"); + break; + case MEMSPACE_HEAP: + warned = warning_meta + (rich_loc, m, get_controlling_option (), + "potential exposure of sensitive information" + " by copying uninitialized data from heap across trust boundary"); + break; + } + if (warned) + { + location_t loc = rich_loc->get_loc (); + inform_number_of_uninit_bits (loc); + complain_about_uninit_ranges (loc); + + if (mem_space == MEMSPACE_STACK) + maybe_emit_fixit_hint (); + } + return warned; + } + + label_text describe_final_event (const evdesc::final_event &) final override + { + enum memory_space mem_space = get_src_memory_space (); + switch (mem_space) + { + default: + return label_text::borrow ("uninitialized data copied here"); + + case MEMSPACE_STACK: + return label_text::borrow ("uninitialized data copied from stack here"); + + case MEMSPACE_HEAP: + return label_text::borrow ("uninitialized data copied from heap here"); + } + } + + void mark_interesting_stuff (interesting_t *interest) final override + { + if (m_src_region) + interest->add_region_creation (m_src_region); + } + +private: + enum memory_space get_src_memory_space () const + { + return m_src_region ? m_src_region->get_memory_space () : MEMSPACE_UNKNOWN; + } + + bit_size_t calc_num_uninit_bits () const + { + switch (m_copied_sval->get_kind ()) + { + default: + gcc_unreachable (); + break; + case SK_POISONED: + { + const poisoned_svalue *poisoned_sval + = as_a <const poisoned_svalue *> (m_copied_sval); + gcc_assert (poisoned_sval->get_poison_kind () == POISON_KIND_UNINIT); + + /* Give up if don't have type information. */ + if (m_copied_sval->get_type () == NULL_TREE) + return 0; + + bit_size_t size_in_bits; + if (int_size_in_bits (m_copied_sval->get_type (), &size_in_bits)) + return size_in_bits; + + /* Give up if we can't get the size of the type. */ + return 0; + } + break; + case SK_COMPOUND: + { + const compound_svalue *compound_sval + = as_a <const compound_svalue *> (m_copied_sval); + bit_size_t result = 0; + /* Find keys for uninit svals. */ + for (auto iter : *compound_sval) + { + const svalue *sval = iter.second; + if (const poisoned_svalue *psval + = sval->dyn_cast_poisoned_svalue ()) + if (psval->get_poison_kind () == POISON_KIND_UNINIT) + { + const binding_key *key = iter.first; + const concrete_binding *ckey + = key->dyn_cast_concrete_binding (); + gcc_assert (ckey); + result += ckey->get_size_in_bits (); + } + } + return result; + } + } + } + + void inform_number_of_uninit_bits (location_t loc) const + { + bit_size_t num_uninit_bits = calc_num_uninit_bits (); + if (num_uninit_bits <= 0) + return; + if (num_uninit_bits % BITS_PER_UNIT == 0) + { + /* Express in bytes. */ + byte_size_t num_uninit_bytes = num_uninit_bits / BITS_PER_UNIT; + if (num_uninit_bytes == 1) + inform (loc, "1 byte is uninitialized"); + else + inform (loc, + "%wu bytes are uninitialized", num_uninit_bytes.to_uhwi ()); + } + else + { + /* Express in bits. */ + if (num_uninit_bits == 1) + inform (loc, "1 bit is uninitialized"); + else + inform (loc, + "%wu bits are uninitialized", num_uninit_bits.to_uhwi ()); + } + } + + void complain_about_uninit_ranges (location_t loc) const + { + if (const compound_svalue *compound_sval + = m_copied_sval->dyn_cast_compound_svalue ()) + { + /* Find keys for uninit svals. */ + auto_vec<const concrete_binding *> uninit_keys; + for (auto iter : *compound_sval) + { + const svalue *sval = iter.second; + if (const poisoned_svalue *psval + = sval->dyn_cast_poisoned_svalue ()) + if (psval->get_poison_kind () == POISON_KIND_UNINIT) + { + const binding_key *key = iter.first; + const concrete_binding *ckey + = key->dyn_cast_concrete_binding (); + gcc_assert (ckey); + uninit_keys.safe_push (ckey); + } + } + /* Complain about them in sorted order. */ + uninit_keys.qsort (concrete_binding::cmp_ptr_ptr); + + std::unique_ptr<record_layout> layout; + + tree type = m_copied_sval->get_type (); + if (type && TREE_CODE (type) == RECORD_TYPE) + { + // (std::make_unique is C++14) + layout = std::unique_ptr<record_layout> (new record_layout (type)); + + if (0) + layout->dump (); + } + + unsigned i; + const concrete_binding *ckey; + FOR_EACH_VEC_ELT (uninit_keys, i, ckey) + { + bit_offset_t start_bit = ckey->get_start_bit_offset (); + bit_offset_t next_bit = ckey->get_next_bit_offset (); + complain_about_uninit_range (loc, start_bit, next_bit, + layout.get ()); + } + } + } + + void complain_about_uninit_range (location_t loc, + bit_offset_t start_bit, + bit_offset_t next_bit, + const record_layout *layout) const + { + if (layout) + { + while (start_bit < next_bit) + { + if (const record_layout::item *item + = layout->get_item_at (start_bit)) + { + gcc_assert (start_bit >= item->get_start_bit_offset ()); + gcc_assert (start_bit < item->get_next_bit_offset ()); + if (item->get_start_bit_offset () == start_bit + && item->get_next_bit_offset () <= next_bit) + complain_about_fully_uninit_item (*item); + else + complain_about_partially_uninit_item (*item); + start_bit = item->get_next_bit_offset (); + continue; + } + else + break; + } + } + + if (start_bit >= next_bit) + return; + + if (start_bit % 8 == 0 && next_bit % 8 == 0) + { + /* Express in bytes. */ + byte_offset_t start_byte = start_bit / 8; + byte_offset_t last_byte = (next_bit / 8) - 1; + if (last_byte == start_byte) + inform (loc, + "byte %wu is uninitialized", + start_byte.to_uhwi ()); + else + inform (loc, + "bytes %wu - %wu are uninitialized", + start_byte.to_uhwi (), + last_byte.to_uhwi ()); + } + else + { + /* Express in bits. */ + bit_offset_t last_bit = next_bit - 1; + if (last_bit == start_bit) + inform (loc, + "bit %wu is uninitialized", + start_bit.to_uhwi ()); + else + inform (loc, + "bits %wu - %wu are uninitialized", + start_bit.to_uhwi (), + last_bit.to_uhwi ()); + } + } + + static void + complain_about_fully_uninit_item (const record_layout::item &item) + { + tree field = item.m_field; + bit_size_t num_bits = item.m_bit_range.m_size_in_bits; + if (item.m_is_padding) + { + if (num_bits % 8 == 0) + { + /* Express in bytes. */ + byte_size_t num_bytes = num_bits / BITS_PER_UNIT; + if (num_bytes == 1) + inform (DECL_SOURCE_LOCATION (field), + "padding after field %qD is uninitialized (1 byte)", + field); + else + inform (DECL_SOURCE_LOCATION (field), + "padding after field %qD is uninitialized (%wu bytes)", + field, num_bytes.to_uhwi ()); + } + else + { + /* Express in bits. */ + if (num_bits == 1) + inform (DECL_SOURCE_LOCATION (field), + "padding after field %qD is uninitialized (1 bit)", + field); + else + inform (DECL_SOURCE_LOCATION (field), + "padding after field %qD is uninitialized (%wu bits)", + field, num_bits.to_uhwi ()); + } + } + else + { + if (num_bits % 8 == 0) + { + /* Express in bytes. */ + byte_size_t num_bytes = num_bits / BITS_PER_UNIT; + if (num_bytes == 1) + inform (DECL_SOURCE_LOCATION (field), + "field %qD is uninitialized (1 byte)", field); + else + inform (DECL_SOURCE_LOCATION (field), + "field %qD is uninitialized (%wu bytes)", + field, num_bytes.to_uhwi ()); + } + else + { + /* Express in bits. */ + if (num_bits == 1) + inform (DECL_SOURCE_LOCATION (field), + "field %qD is uninitialized (1 bit)", field); + else + inform (DECL_SOURCE_LOCATION (field), + "field %qD is uninitialized (%wu bits)", + field, num_bits.to_uhwi ()); + } + } + } + + static void + complain_about_partially_uninit_item (const record_layout::item &item) + { + tree field = item.m_field; + if (item.m_is_padding) + inform (DECL_SOURCE_LOCATION (field), + "padding after field %qD is partially uninitialized", + field); + else + inform (DECL_SOURCE_LOCATION (field), + "field %qD is partially uninitialized", + field); + /* TODO: ideally we'd describe what parts are uninitialized. */ + } + + void maybe_emit_fixit_hint () const + { + if (tree decl = m_src_region->maybe_get_decl ()) + { + gcc_rich_location hint_richloc (DECL_SOURCE_LOCATION (decl)); + hint_richloc.add_fixit_insert_after (" = {0}"); + inform (&hint_richloc, + "suggest forcing zero-initialization by" + " providing a %<{0}%> initializer"); + } + } + +private: + const region *m_src_region; + const region *m_dest_region; + const svalue *m_copied_sval; +}; + +/* Return true if any part of SVAL is uninitialized. */ + +static bool +contains_uninit_p (const svalue *sval) +{ + struct uninit_finder : public visitor + { + public: + uninit_finder () : m_found_uninit (false) {} + void visit_poisoned_svalue (const poisoned_svalue *sval) + { + if (sval->get_poison_kind () == POISON_KIND_UNINIT) + m_found_uninit = true; + } + bool m_found_uninit; + }; + + uninit_finder v; + sval->accept (&v); + + return v.m_found_uninit; +} + +/* Function for use by plugins when simulating writing data through a + pointer to an "untrusted" region DST_REG (and thus crossing a security + boundary), such as copying data to user space in an OS kernel. + + Check that COPIED_SVAL is fully initialized. If not, complain about + an infoleak to CTXT. + + SRC_REG can be NULL; if non-NULL it is used as a hint in the diagnostic + as to where COPIED_SVAL came from. */ + +void +region_model::maybe_complain_about_infoleak (const region *dst_reg, + const svalue *copied_sval, + const region *src_reg, + region_model_context *ctxt) +{ + /* Check for exposure. */ + if (contains_uninit_p (copied_sval)) + ctxt->warn (new exposure_through_uninit_copy (src_reg, + dst_reg, + copied_sval)); +} + /* class noop_region_model_context : public region_model_context. */ void @@ -5637,7 +6708,7 @@ test_struct () /* Verify get_offset for "c.x". */ { const region *c_x_reg = model.get_lvalue (c_x, NULL); - region_offset offset = c_x_reg->get_offset (); + region_offset offset = c_x_reg->get_offset (&mgr); ASSERT_EQ (offset.get_base_region (), model.get_lvalue (c, NULL)); ASSERT_EQ (offset.get_bit_offset (), 0); } @@ -5645,7 +6716,7 @@ test_struct () /* Verify get_offset for "c.y". */ { const region *c_y_reg = model.get_lvalue (c_y, NULL); - region_offset offset = c_y_reg->get_offset (); + region_offset offset = c_y_reg->get_offset (&mgr); ASSERT_EQ (offset.get_base_region (), model.get_lvalue (c, NULL)); ASSERT_EQ (offset.get_bit_offset (), INT_TYPE_SIZE); } @@ -7140,7 +8211,7 @@ test_var () /* Verify get_offset for "i". */ { - region_offset offset = i_reg->get_offset (); + region_offset offset = i_reg->get_offset (&mgr); ASSERT_EQ (offset.get_base_region (), i_reg); ASSERT_EQ (offset.get_bit_offset (), 0); } @@ -7189,7 +8260,7 @@ test_array_2 () /* Verify get_offset for "arr[0]". */ { const region *arr_0_reg = model.get_lvalue (arr_0, NULL); - region_offset offset = arr_0_reg->get_offset (); + region_offset offset = arr_0_reg->get_offset (&mgr); ASSERT_EQ (offset.get_base_region (), model.get_lvalue (arr, NULL)); ASSERT_EQ (offset.get_bit_offset (), 0); } @@ -7197,11 +8268,19 @@ test_array_2 () /* Verify get_offset for "arr[1]". */ { const region *arr_1_reg = model.get_lvalue (arr_1, NULL); - region_offset offset = arr_1_reg->get_offset (); + region_offset offset = arr_1_reg->get_offset (&mgr); ASSERT_EQ (offset.get_base_region (), model.get_lvalue (arr, NULL)); ASSERT_EQ (offset.get_bit_offset (), INT_TYPE_SIZE); } + /* Verify get_offset for "arr[i]". */ + { + const region *arr_i_reg = model.get_lvalue (arr_i, NULL); + region_offset offset = arr_i_reg->get_offset (&mgr); + ASSERT_EQ (offset.get_base_region (), model.get_lvalue (arr, NULL)); + ASSERT_EQ (offset.get_symbolic_byte_offset ()->get_kind (), SK_BINOP); + } + /* "arr[i] = i;" - this should remove the earlier bindings. */ model.set_value (arr_i, i, NULL); ASSERT_EQ (model.get_rvalue (arr_i, NULL), model.get_rvalue (i, NULL)); diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index a1f2165..e86720a 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see #include "analyzer/svalue.h" #include "analyzer/region.h" +#include "analyzer/known-function-manager.h" using namespace ana; @@ -347,6 +348,11 @@ public: store_manager *get_store_manager () { return &m_store_mgr; } bounded_ranges_manager *get_range_manager () const { return m_range_mgr; } + known_function_manager *get_known_function_manager () + { + return &m_known_fn_mgr; + } + /* Dynamically-allocated region instances. The number of these within the analysis can grow arbitrarily. They are still owned by the manager. */ @@ -504,6 +510,8 @@ private: bounded_ranges_manager *m_range_mgr; + known_function_manager m_known_fn_mgr; + /* "Dynamically-allocated" region instances. The number of these within the analysis can grow arbitrarily. They are still owned by the manager. */ @@ -521,8 +529,11 @@ public: call_details (const gcall *call, region_model *model, region_model_context *ctxt); + region_model *get_model () const { return m_model; } region_model_manager *get_manager () const; region_model_context *get_ctxt () const { return m_ctxt; } + logger *get_logger () const; + uncertainty_t *get_uncertainty () const; tree get_lhs_type () const { return m_lhs_type; } const region *get_lhs_region () const { return m_lhs_region; } @@ -645,6 +656,12 @@ class region_model void impl_call_va_arg (const call_details &cd); void impl_call_va_end (const call_details &cd); + const svalue *maybe_get_copy_bounds (const region *src_reg, + const svalue *num_bytes_sval); + void update_for_zero_return (const call_details &cd, + bool unmergeable); + void update_for_nonzero_return (const call_details &cd); + void handle_unrecognized_call (const gcall *call, region_model_context *ctxt); void get_reachable_svalues (svalue_set *out, @@ -717,6 +734,9 @@ class region_model const svalue *rhs) const; tristate compare_initial_and_pointer (const initial_svalue *init, const region_svalue *ptr) const; + tristate symbolic_greater_than (const binop_svalue *a, + const svalue *b) const; + tristate structural_equality (const svalue *a, const svalue *b) const; tristate eval_condition (tree lhs, enum tree_code op, tree rhs, @@ -796,11 +816,20 @@ class region_model const svalue *get_string_size (const svalue *sval) const; const svalue *get_string_size (const region *reg) const; + void maybe_complain_about_infoleak (const region *dst_reg, + const svalue *copied_sval, + const region *src_reg, + region_model_context *ctxt); + /* Implemented in sm-malloc.cc */ void on_realloc_with_move (const call_details &cd, const svalue *old_ptr_sval, const svalue *new_ptr_sval); + /* Implemented in sm-taint.cc. */ + void mark_as_tainted (const svalue *sval, + region_model_context *ctxt); + private: const region *get_lvalue_1 (path_var pv, region_model_context *ctxt) const; const svalue *get_rvalue_1 (path_var pv, region_model_context *ctxt) const; @@ -812,6 +841,8 @@ class region_model get_representative_path_var_1 (const region *reg, svalue_set *visited) const; + const known_function *get_known_function (tree fndecl) const; + bool add_constraint (const svalue *lhs, enum tree_code op, const svalue *rhs, @@ -874,6 +905,12 @@ class region_model region_model_context *ctxt) const; void check_region_size (const region *lhs_reg, const svalue *rhs_sval, region_model_context *ctxt) const; + void check_symbolic_bounds (const region *base_reg, + const svalue *sym_byte_offset, + const svalue *num_bytes_sval, + const svalue *capacity, + enum access_direction dir, + region_model_context *ctxt) const; void check_region_bounds (const region *reg, enum access_direction dir, region_model_context *ctxt) const; @@ -1315,6 +1352,10 @@ public: engine (const supergraph *sg = NULL, logger *logger = NULL); const supergraph *get_supergraph () { return m_sg; } region_model_manager *get_model_manager () { return &m_mgr; } + known_function_manager *get_known_function_manager () + { + return m_mgr.get_known_function_manager (); + } void log_stats (logger *logger) const; diff --git a/gcc/analyzer/region.cc b/gcc/analyzer/region.cc index 9c8279b..09646bf 100644 --- a/gcc/analyzer/region.cc +++ b/gcc/analyzer/region.cc @@ -290,10 +290,10 @@ region::maybe_get_decl () const first call and caching it internally). */ region_offset -region::get_offset () const +region::get_offset (region_model_manager *mgr) const { if(!m_cached_offset) - m_cached_offset = new region_offset (calc_offset ()); + m_cached_offset = new region_offset (calc_offset (mgr)); return *m_cached_offset; } @@ -491,10 +491,11 @@ region::get_subregions_for_binding (region_model_manager *mgr, or a symbolic offset. */ region_offset -region::calc_offset () const +region::calc_offset (region_model_manager *mgr) const { const region *iter_region = this; bit_offset_t accum_bit_offset = 0; + const svalue *accum_byte_sval = NULL; while (iter_region) { @@ -504,16 +505,36 @@ region::calc_offset () const case RK_ELEMENT: case RK_OFFSET: case RK_BIT_RANGE: - { - bit_offset_t rel_bit_offset; - if (!iter_region->get_relative_concrete_offset (&rel_bit_offset)) - return region_offset::make_symbolic - (iter_region->get_parent_region ()); - accum_bit_offset += rel_bit_offset; - iter_region = iter_region->get_parent_region (); - } + if (accum_byte_sval) + { + const svalue *sval + = iter_region->get_relative_symbolic_offset (mgr); + accum_byte_sval + = mgr->get_or_create_binop (sval->get_type (), PLUS_EXPR, + accum_byte_sval, sval); + iter_region = iter_region->get_parent_region (); + } + else + { + bit_offset_t rel_bit_offset; + if (iter_region->get_relative_concrete_offset (&rel_bit_offset)) + { + accum_bit_offset += rel_bit_offset; + iter_region = iter_region->get_parent_region (); + } + else + { + /* If the iter_region is not concrete anymore, convert the + accumulated bits to a svalue in bytes and revisit the + iter_region collecting the symbolic value. */ + byte_offset_t byte_offset = accum_bit_offset / BITS_PER_UNIT; + tree offset_tree = wide_int_to_tree (integer_type_node, + byte_offset); + accum_byte_sval + = mgr->get_or_create_constant_svalue (offset_tree); + } + } continue; - case RK_SIZED: iter_region = iter_region->get_parent_region (); continue; @@ -527,10 +548,18 @@ region::calc_offset () const continue; default: - return region_offset::make_concrete (iter_region, accum_bit_offset); + return accum_byte_sval + ? region_offset::make_symbolic (iter_region, + accum_byte_sval) + : region_offset::make_concrete (iter_region, + accum_bit_offset); } } - return region_offset::make_concrete (iter_region, accum_bit_offset); + + return accum_byte_sval ? region_offset::make_symbolic (iter_region, + accum_byte_sval) + : region_offset::make_concrete (iter_region, + accum_bit_offset); } /* Base implementation of region::get_relative_concrete_offset vfunc. */ @@ -541,6 +570,14 @@ region::get_relative_concrete_offset (bit_offset_t *) const return false; } +/* Base implementation of region::get_relative_symbolic_offset vfunc. */ + +const svalue * +region::get_relative_symbolic_offset (region_model_manager *mgr) const +{ + return mgr->get_or_create_unknown_svalue (integer_type_node); +} + /* Attempt to get the position and size of this region expressed as a concrete range of bytes relative to its parent. If successful, return true and write to *OUT. @@ -1316,6 +1353,25 @@ field_region::get_relative_concrete_offset (bit_offset_t *out) const return true; } + +/* Implementation of region::get_relative_symbolic_offset vfunc + for field_region. + If known, the returned svalue is equal to the offset converted to bytes and + rounded off. */ + +const svalue * +field_region::get_relative_symbolic_offset (region_model_manager *mgr) const +{ + bit_offset_t out; + if (get_relative_concrete_offset (&out)) + { + tree cst_tree + = wide_int_to_tree (integer_type_node, out / BITS_PER_UNIT); + return mgr->get_or_create_constant_svalue (cst_tree); + } + return mgr->get_or_create_unknown_svalue (integer_type_node); +} + /* class element_region : public region. */ /* Implementation of region::accept vfunc for element_region. */ @@ -1382,6 +1438,29 @@ element_region::get_relative_concrete_offset (bit_offset_t *out) const return false; } +/* Implementation of region::get_relative_symbolic_offset vfunc + for element_region. */ + +const svalue * +element_region::get_relative_symbolic_offset (region_model_manager *mgr) const +{ + tree elem_type = get_type (); + + /* First, use int_size_in_bytes, to reject the case where we + have an incomplete type, or a non-constant value. */ + HOST_WIDE_INT hwi_byte_size = int_size_in_bytes (elem_type); + if (hwi_byte_size > 0) + { + tree byte_size_tree = wide_int_to_tree (integer_type_node, + hwi_byte_size); + const svalue *byte_size_sval + = mgr->get_or_create_constant_svalue (byte_size_tree); + return mgr->get_or_create_binop (integer_type_node, MULT_EXPR, + m_index, byte_size_sval); + } + return mgr->get_or_create_unknown_svalue (integer_type_node); +} + /* class offset_region : public region. */ /* Implementation of region::accept vfunc for offset_region. */ @@ -1438,6 +1517,16 @@ offset_region::get_relative_concrete_offset (bit_offset_t *out) const return false; } +/* Implementation of region::get_relative_symbolic_offset vfunc + for offset_region. */ + +const svalue * +offset_region::get_relative_symbolic_offset (region_model_manager *mgr + ATTRIBUTE_UNUSED) const +{ + return get_byte_offset (); +} + /* Implementation of region::get_byte_size_sval vfunc for offset_region. */ const svalue * @@ -1683,6 +1772,20 @@ bit_range_region::get_relative_concrete_offset (bit_offset_t *out) const return true; } +/* Implementation of region::get_relative_symbolic_offset vfunc for + bit_range_region. + The returned svalue is equal to the offset converted to bytes and + rounded off. */ + +const svalue * +bit_range_region::get_relative_symbolic_offset (region_model_manager *mgr) + const +{ + byte_offset_t start_byte = m_bits.get_start_bit_offset () / BITS_PER_UNIT; + tree start_bit_tree = wide_int_to_tree (integer_type_node, start_byte); + return mgr->get_or_create_constant_svalue (start_bit_tree); +} + /* class var_arg_region : public region. */ void diff --git a/gcc/analyzer/region.h b/gcc/analyzer/region.h index 34ce1fa..6315fac 100644 --- a/gcc/analyzer/region.h +++ b/gcc/analyzer/region.h @@ -175,7 +175,7 @@ public: bool involves_p (const svalue *sval) const; - region_offset get_offset () const; + region_offset get_offset (region_model_manager *mgr) const; /* Attempt to get the size of this region as a concrete number of bytes. If successful, return true and write the size to *OUT. @@ -196,6 +196,11 @@ public: Otherwise return false. */ virtual bool get_relative_concrete_offset (bit_offset_t *out) const; + /* Get the offset in bytes of this region relative to its parent as a svalue. + Might return an unknown_svalue. */ + virtual const svalue * + get_relative_symbolic_offset (region_model_manager *mgr) const; + /* Attempt to get the position and size of this region expressed as a concrete range of bytes relative to its parent. If successful, return true and write to *OUT. @@ -226,7 +231,7 @@ public: region (complexity c, unsigned id, const region *parent, tree type); private: - region_offset calc_offset () const; + region_offset calc_offset (region_model_manager *mgr) const; complexity m_complexity; unsigned m_id; // purely for deterministic sorting at this stage, for dumps @@ -751,6 +756,8 @@ public: tree get_field () const { return m_field; } bool get_relative_concrete_offset (bit_offset_t *out) const final override; + const svalue *get_relative_symbolic_offset (region_model_manager *mgr) + const final override; private: tree m_field; @@ -835,6 +842,8 @@ public: virtual bool get_relative_concrete_offset (bit_offset_t *out) const final override; + const svalue *get_relative_symbolic_offset (region_model_manager *mgr) + const final override; private: const svalue *m_index; @@ -919,6 +928,8 @@ public: const svalue *get_byte_offset () const { return m_byte_offset; } bool get_relative_concrete_offset (bit_offset_t *out) const final override; + const svalue *get_relative_symbolic_offset (region_model_manager *mgr) + const final override; const svalue * get_byte_size_sval (region_model_manager *mgr) const final override; @@ -1245,6 +1256,8 @@ public: bool get_bit_size (bit_size_t *out) const final override; const svalue *get_byte_size_sval (region_model_manager *mgr) const final override; bool get_relative_concrete_offset (bit_offset_t *out) const final override; + const svalue *get_relative_symbolic_offset (region_model_manager *mgr) + const final override; private: bit_range m_bits; diff --git a/gcc/analyzer/sm-taint.cc b/gcc/analyzer/sm-taint.cc index 549373b..f5c0cc1 100644 --- a/gcc/analyzer/sm-taint.cc +++ b/gcc/analyzer/sm-taint.cc @@ -1365,6 +1365,33 @@ region_model::check_dynamic_size_for_taint (enum memory_space mem_space, } } +/* Mark SVAL as TAINTED. CTXT must be non-NULL. */ + +void +region_model::mark_as_tainted (const svalue *sval, + region_model_context *ctxt) +{ + gcc_assert (sval); + gcc_assert (ctxt); + + sm_state_map *smap; + const state_machine *sm; + unsigned sm_idx; + if (!ctxt->get_taint_map (&smap, &sm, &sm_idx)) + return; + + gcc_assert (smap); + gcc_assert (sm); + + const taint_state_machine &taint_sm = (const taint_state_machine &)*sm; + + const extrinsic_state *ext_state = ctxt->get_ext_state (); + if (!ext_state) + return; + + smap->set_state (this, sval, taint_sm.m_tainted, NULL, *ext_state); +} + } // namespace ana #endif /* #if ENABLE_ANALYZER */ diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc index 848c5e1..1857d95 100644 --- a/gcc/analyzer/store.cc +++ b/gcc/analyzer/store.cc @@ -123,7 +123,7 @@ uncertainty_t::dump (bool simple) const const binding_key * binding_key::make (store_manager *mgr, const region *r) { - region_offset offset = r->get_offset (); + region_offset offset = r->get_offset (mgr->get_svalue_manager ()); if (offset.symbolic_p ()) return mgr->get_symbolic_binding (r); else @@ -380,7 +380,11 @@ bit_range::as_byte_range (byte_range *out) const void byte_range::dump_to_pp (pretty_printer *pp) const { - if (m_size_in_bytes == 1) + if (m_size_in_bytes == 0) + { + pp_string (pp, "empty"); + } + else if (m_size_in_bytes == 1) { pp_string (pp, "byte "); pp_wide_int (pp, m_start_byte_offset, SIGNED); @@ -455,7 +459,9 @@ bool byte_range::exceeds_p (const byte_range &other, byte_range *out_overhanging_byte_range) const { - if (other.get_last_byte_offset () < get_last_byte_offset ()) + gcc_assert (!empty_p ()); + + if (other.get_next_byte_offset () < get_next_byte_offset ()) { /* THIS definitely exceeds OTHER. */ byte_offset_t start = MAX (get_start_byte_offset (), @@ -477,6 +483,8 @@ bool byte_range::falls_short_of_p (byte_offset_t offset, byte_range *out_fall_short_bytes) const { + gcc_assert (!empty_p ()); + if (get_start_byte_offset () < offset) { /* THIS falls short of OFFSET. */ @@ -897,7 +905,7 @@ binding_map::apply_ctor_val_to_range (const region *parent_reg, = get_subregion_within_ctor (parent_reg, min_index, mgr); const region *max_element = get_subregion_within_ctor (parent_reg, max_index, mgr); - region_offset min_offset = min_element->get_offset (); + region_offset min_offset = min_element->get_offset (mgr); if (min_offset.symbolic_p ()) return false; bit_offset_t start_bit_offset = min_offset.get_bit_offset (); @@ -955,11 +963,11 @@ binding_map::apply_ctor_pair_to_child_region (const region *parent_reg, gcc_assert (sval_byte_size != -1); bit_size_t sval_bit_size = sval_byte_size * BITS_PER_UNIT; /* Get offset of child relative to base region. */ - region_offset child_base_offset = child_reg->get_offset (); + region_offset child_base_offset = child_reg->get_offset (mgr); if (child_base_offset.symbolic_p ()) return false; /* Convert to an offset relative to the parent region. */ - region_offset parent_base_offset = parent_reg->get_offset (); + region_offset parent_base_offset = parent_reg->get_offset (mgr); gcc_assert (!parent_base_offset.symbolic_p ()); bit_offset_t child_parent_offset = (child_base_offset.get_bit_offset () @@ -1365,7 +1373,8 @@ binding_cluster::bind_compound_sval (store_manager *mgr, const region *reg, const compound_svalue *compound_sval) { - region_offset reg_offset = reg->get_offset (); + region_offset reg_offset + = reg->get_offset (mgr->get_svalue_manager ()); if (reg_offset.symbolic_p ()) { m_touched = true; @@ -1614,7 +1623,7 @@ binding_cluster::get_any_binding (store_manager *mgr, /* Alternatively, if this is a symbolic read and the cluster has any bindings, then we don't know if we're reading those values or not, so the result is also "UNKNOWN". */ - if (reg->get_offset ().symbolic_p () + if (reg->get_offset (mgr->get_svalue_manager ()).symbolic_p () && m_map.elements () > 0) { region_model_manager *rmm_mgr = mgr->get_svalue_manager (); @@ -1643,10 +1652,11 @@ const svalue * binding_cluster::maybe_get_compound_binding (store_manager *mgr, const region *reg) const { - region_offset cluster_offset = m_base_region->get_offset (); + region_offset cluster_offset + = m_base_region->get_offset (mgr->get_svalue_manager ()); if (cluster_offset.symbolic_p ()) return NULL; - region_offset reg_offset = reg->get_offset (); + region_offset reg_offset = reg->get_offset (mgr->get_svalue_manager ()); if (reg_offset.symbolic_p ()) return NULL; diff --git a/gcc/analyzer/store.h b/gcc/analyzer/store.h index ac8b685..d172ee7 100644 --- a/gcc/analyzer/store.h +++ b/gcc/analyzer/store.h @@ -237,6 +237,11 @@ struct bit_range void dump_to_pp (pretty_printer *pp) const; void dump () const; + bool empty_p () const + { + return m_size_in_bits == 0; + } + bit_offset_t get_start_bit_offset () const { return m_start_bit_offset; @@ -247,6 +252,7 @@ struct bit_range } bit_offset_t get_last_bit_offset () const { + gcc_assert (!empty_p ()); return get_next_bit_offset () - 1; } @@ -297,6 +303,11 @@ struct byte_range void dump_to_pp (pretty_printer *pp) const; void dump () const; + bool empty_p () const + { + return m_size_in_bytes == 0; + } + bool contains_p (byte_offset_t offset) const { return (offset >= get_start_byte_offset () @@ -329,6 +340,7 @@ struct byte_range } byte_offset_t get_last_byte_offset () const { + gcc_assert (!empty_p ()); return m_start_byte_offset + m_size_in_bytes - 1; } |