diff options
29 files changed, 2479 insertions, 54 deletions
diff --git a/gcc/Makefile.in b/gcc/Makefile.in index c1d0438..f672e6e 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1255,6 +1255,7 @@ ANALYZER_OBJS = \ analyzer/bar-chart.o \ analyzer/call-info.o \ analyzer/call-string.o \ + analyzer/call-summary.o \ analyzer/checker-path.o \ analyzer/complexity.o \ analyzer/constraint-manager.o \ diff --git a/gcc/analyzer/analyzer-logging.h b/gcc/analyzer/analyzer-logging.h index ef14b29..71b540c 100644 --- a/gcc/analyzer/analyzer-logging.h +++ b/gcc/analyzer/analyzer-logging.h @@ -23,6 +23,8 @@ along with GCC; see the file COPYING3. If not see #ifndef ANALYZER_LOGGING_H #define ANALYZER_LOGGING_H +#include "diagnostic-core.h" + namespace ana { /* A logger encapsulates a logging stream: a way to send diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h index b325aee..49c19af 100644 --- a/gcc/analyzer/analyzer.h +++ b/gcc/analyzer/analyzer.h @@ -21,6 +21,8 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_ANALYZER_ANALYZER_H #define GCC_ANALYZER_ANALYZER_H +#include "function.h" + class graphviz_out; namespace ana { @@ -114,6 +116,10 @@ class state_machine; class logger; class visitor; class known_function_manager; +class call_summary; +class call_summary_replay; +struct per_function_data; +struct interesting_t; /* Forward decls of functions. */ @@ -263,6 +269,11 @@ public: /* Hook for making .dot label more readable. */ virtual void print (pretty_printer *pp) const = 0; + /* Hook for updating STATE when handling bifurcation. */ + virtual bool update_state (program_state *state, + const exploded_edge *eedge, + region_model_context *ctxt) const; + /* Hook for updating MODEL within exploded_path::feasible_p and when handling bifurcation. */ virtual bool update_model (region_model *model, diff --git a/gcc/analyzer/call-info.cc b/gcc/analyzer/call-info.cc index efc070b..d9a261f 100644 --- a/gcc/analyzer/call-info.cc +++ b/gcc/analyzer/call-info.cc @@ -66,7 +66,17 @@ along with GCC; see the file COPYING3. If not see namespace ana { -/* class call_info : public custom_eedge_info_t. */ +/* class custom_edge_info. */ + +bool +custom_edge_info::update_state (program_state *state, + const exploded_edge *eedge, + region_model_context *ctxt) const +{ + return update_model (state->m_region_model, eedge, ctxt); +} + +/* class call_info : public custom_edge_info. */ /* Implementation of custom_edge_info::print vfunc for call_info: use get_desc to get a label_text, and print it to PP. */ diff --git a/gcc/analyzer/call-summary.cc b/gcc/analyzer/call-summary.cc new file mode 100644 index 0000000..9d8716d --- /dev/null +++ b/gcc/analyzer/call-summary.cc @@ -0,0 +1,875 @@ +/* Classes for working with summaries of function calls. + Copyright (C) 2022 David Malcolm <dmalcolm@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "tree-dfa.h" +#include "diagnostic.h" +#include "tree-diagnostic.h" +#include "selftest.h" +#include "analyzer/analyzer.h" +#include "analyzer/region-model.h" +#include "analyzer/call-summary.h" +#include "analyzer/exploded-graph.h" + +#if ENABLE_ANALYZER + +namespace ana { + +/* class call_summary. */ + +const program_state & +call_summary::get_state () const +{ + return m_enode->get_state (); +} + +tree +call_summary::get_fndecl () const +{ + return m_enode->get_point ().get_fndecl (); +} + +label_text +call_summary::get_desc () const +{ + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + + get_user_facing_desc (&pp); + if (flag_analyzer_verbose_edges) + pp_printf (&pp, " (call summary; EN: %i)", m_enode->m_index); + + return label_text::take (xstrdup (pp_formatted_text (&pp))); +} + +/* Generate a user-facing description of this call summary.c + This has various heuristics for distinguishing between different + summaries. + This will help with debugging, too. */ + +void +call_summary::get_user_facing_desc (pretty_printer *pp) const +{ + tree fndecl = get_fndecl (); + + /* If there are multiple summaries, try to use the return value to + distinguish between them. */ + if (m_per_fn_data->m_summaries.length () > 1) + { + if (tree result = DECL_RESULT (fndecl)) + { + const region *result_reg + = get_state ().m_region_model->get_lvalue (result, NULL); + const svalue *result_sval + = get_state ().m_region_model->get_store_value (result_reg, NULL); + switch (result_sval->get_kind ()) + { + default: + break; + case SK_REGION: + { + const region_svalue *region_sval + = as_a <const region_svalue *> (result_sval); + const region *pointee_reg = region_sval->get_pointee (); + switch (pointee_reg->get_kind ()) + { + default: + break; + case RK_HEAP_ALLOCATED: + pp_printf (pp, + "when %qE returns pointer" + " to heap-allocated buffer", + fndecl); + return; + } + } + break; + case SK_CONSTANT: + { + const constant_svalue *constant_sval + = as_a <const constant_svalue *> (result_sval); + tree cst = constant_sval->get_constant (); + if (POINTER_TYPE_P (TREE_TYPE (result)) + && zerop (cst)) + pp_printf (pp, "when %qE returns NULL", fndecl); + else + pp_printf (pp, "when %qE returns %qE", fndecl, cst); + return; + } + } + } + } + + /* Fallback. */ + pp_printf (pp, "when %qE returns", fndecl); +} + +/* Dump a multiline representation of this object to PP. */ + +void +call_summary::dump_to_pp (const extrinsic_state &ext_state, + pretty_printer *pp, + bool simple) const +{ + label_text desc = get_desc (); + pp_printf (pp, "desc: %qs", desc.get ()); + pp_newline (pp); + + get_state ().dump_to_pp (ext_state, simple, true, pp); +} + +/* Dump a multiline representation of this object to FILE. */ + +void +call_summary::dump (const extrinsic_state &ext_state, + FILE *fp, + bool simple) const +{ + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + pp_show_color (&pp) = pp_show_color (global_dc->printer); + pp.buffer->stream = fp; + dump_to_pp (ext_state, &pp, simple); + pp_flush (&pp); +} + +/* Dump a multiline representation of this object to stderr. */ + +DEBUG_FUNCTION void +call_summary::dump (const extrinsic_state &ext_state, bool simple) const +{ + dump (ext_state, stderr, simple); +} + +/* class call_summary_replay. */ + +/* call_summary_replay's ctor. + Populate the cache with params for the summary based on + arguments at the caller. */ + +call_summary_replay::call_summary_replay (const call_details &cd, + function *called_fn, + call_summary *summary, + const extrinsic_state &ext_state) +: m_cd (cd), + m_called_fn (called_fn), + m_summary (summary), + m_ext_state (ext_state) +{ + region_model_manager *mgr = cd.get_manager (); + + // populate params based on args + tree fndecl = called_fn->decl; + + /* Get a frame_region for use with respect to the summary. + This will be a top-level frame, since that's what's in + the summary. */ + const frame_region *summary_frame + = mgr->get_frame_region (NULL, called_fn); + + unsigned idx = 0; + for (tree iter_parm = DECL_ARGUMENTS (fndecl); iter_parm; + iter_parm = DECL_CHAIN (iter_parm), ++idx) + { + /* If there's a mismatching declaration, the call stmt might + not have enough args. Handle this case by leaving the + rest of the params as uninitialized. */ + if (idx >= cd.num_args ()) + break; + const svalue *caller_arg_sval = cd.get_arg_svalue (idx); + tree parm_lval = iter_parm; + if (tree parm_default_ssa = ssa_default_def (called_fn, iter_parm)) + parm_lval = parm_default_ssa; + const region *summary_parm_reg + = summary_frame->get_region_for_local (mgr, parm_lval, cd.get_ctxt ()); + const svalue *summary_initial_parm_reg + = mgr->get_or_create_initial_value (summary_parm_reg); + add_svalue_mapping (summary_initial_parm_reg, caller_arg_sval); + } + + /* Handle any variadic args. */ + unsigned va_arg_idx = 0; + for (; idx < cd.num_args (); idx++, va_arg_idx++) + { + const svalue *caller_arg_sval = cd.get_arg_svalue (idx); + const region *summary_var_arg_reg + = mgr->get_var_arg_region (summary_frame, va_arg_idx); + const svalue *summary_initial_var_arg_reg + = mgr->get_or_create_initial_value (summary_var_arg_reg); + add_svalue_mapping (summary_initial_var_arg_reg, caller_arg_sval); + } +} + +/* Try to convert SUMMARY_SVAL in the summary to a corresponding svalue + in the caller, caching the result. + + Return NULL if the conversion is not possible. */ + +const svalue * +call_summary_replay::convert_svalue_from_summary (const svalue *summary_sval) +{ + gcc_assert (summary_sval); + + if (const svalue **slot + = m_map_svalue_from_summary_to_caller.get (summary_sval)) + return *slot; + + const svalue *caller_sval = convert_svalue_from_summary_1 (summary_sval); + + /* Add to cache. */ + add_svalue_mapping (summary_sval, caller_sval); + + return caller_sval; +} + +/* Implementation of call_summary_replay::convert_svalue_from_summary. */ + +const svalue * +call_summary_replay::convert_svalue_from_summary_1 (const svalue *summary_sval) +{ + gcc_assert (summary_sval); + + switch (summary_sval->get_kind ()) + { + default: + gcc_unreachable (); + case SK_REGION: + { + const region_svalue *region_summary_sval + = as_a <const region_svalue *> (summary_sval); + const region *summary_reg = region_summary_sval->get_pointee (); + const region *caller_reg = convert_region_from_summary (summary_reg); + if (!caller_reg) + return NULL; + region_model_manager *mgr = get_manager (); + const svalue *caller_ptr + = mgr->get_ptr_svalue (summary_sval->get_type (), + caller_reg); + return caller_ptr; + } + break; + + case SK_CONSTANT: + case SK_PLACEHOLDER: + case SK_POISONED: + case SK_UNKNOWN: + return summary_sval; + + case SK_SETJMP: + return NULL; // TODO + + case SK_INITIAL: + { + const initial_svalue *initial_summary_sval + = as_a <const initial_svalue *> (summary_sval); + /* Params should already be in the cache, courtesy of the ctor. */ + gcc_assert (!initial_summary_sval->initial_value_of_param_p ()); + + /* Initial value of region within the summary is the value of the + region at the point of the call. */ + const region *summary_reg = initial_summary_sval->get_region (); + const region *caller_reg = convert_region_from_summary (summary_reg); + if (!caller_reg) + return NULL; + const svalue *caller_sval + = m_cd.get_model ()->get_store_value (caller_reg, m_cd.get_ctxt ()); + return caller_sval; + } + break; + case SK_UNARYOP: + { + const unaryop_svalue *unaryop_summary_sval + = as_a <const unaryop_svalue *> (summary_sval); + region_model_manager *mgr = get_manager (); + return mgr->get_or_create_unaryop + (summary_sval->get_type (), + unaryop_summary_sval->get_op (), + convert_svalue_from_summary (unaryop_summary_sval->get_arg ())); + } + break; + case SK_BINOP: + { + const binop_svalue *binop_summary_sval + = as_a <const binop_svalue *> (summary_sval); + region_model_manager *mgr = get_manager (); + return mgr->get_or_create_binop + (summary_sval->get_type (), + binop_summary_sval->get_op (), + convert_svalue_from_summary (binop_summary_sval->get_arg0 ()), + convert_svalue_from_summary (binop_summary_sval->get_arg1 ())); + } + break; + case SK_SUB: + { + const sub_svalue *sub_summary_sval + = as_a <const sub_svalue *> (summary_sval); + region_model_manager *mgr = get_manager (); + const svalue *summary_parent_sval = sub_summary_sval->get_parent (); + if (!summary_parent_sval) + return NULL; + const region *summary_subregion = sub_summary_sval->get_subregion (); + if (!summary_subregion) + return NULL; + return mgr->get_or_create_sub_svalue (summary_sval->get_type (), + summary_parent_sval, + summary_subregion); + } + break; + case SK_REPEATED: + { + const repeated_svalue *repeated_summary_sval + = as_a <const repeated_svalue *> (summary_sval); + const svalue *summary_outer_size + = repeated_summary_sval->get_outer_size (); + const svalue *caller_outer_size + = convert_svalue_from_summary (summary_outer_size); + if (!caller_outer_size) + return NULL; + const svalue *summary_inner_sval + = repeated_summary_sval->get_inner_svalue (); + const svalue *caller_inner_sval + = convert_svalue_from_summary (summary_inner_sval); + if (!caller_inner_sval) + return NULL; + region_model_manager *mgr = get_manager (); + return mgr->get_or_create_repeated_svalue (summary_sval->get_type (), + caller_outer_size, + caller_inner_sval); + } + break; + case SK_BITS_WITHIN: + { + const bits_within_svalue *bits_within_summary_sval + = as_a <const bits_within_svalue *> (summary_sval); + const bit_range &bits = bits_within_summary_sval->get_bits (); + const svalue *summary_inner_sval + = bits_within_summary_sval->get_inner_svalue (); + const svalue *caller_inner_sval + = convert_svalue_from_summary (summary_inner_sval); + if (!caller_inner_sval) + return NULL; + region_model_manager *mgr = get_manager (); + return mgr->get_or_create_bits_within (summary_sval->get_type (), + bits, + caller_inner_sval); + } + break; + case SK_UNMERGEABLE: + { + const unmergeable_svalue *unmergeable_summary_sval + = as_a <const unmergeable_svalue *> (summary_sval); + const svalue *summary_arg_sval = unmergeable_summary_sval->get_arg (); + const svalue *caller_arg_sval + = convert_svalue_from_summary (summary_arg_sval); + if (!caller_arg_sval) + return NULL; + region_model_manager *mgr = get_manager (); + return mgr->get_or_create_unmergeable (caller_arg_sval); + } + break; + case SK_WIDENING: + { + const widening_svalue *widening_summary_sval + = as_a <const widening_svalue *> (summary_sval); + const function_point &point = widening_summary_sval->get_point (); + const svalue *summary_base_sval + = widening_summary_sval->get_base_svalue (); + const svalue *caller_base_sval + = convert_svalue_from_summary (summary_base_sval); + if (!(caller_base_sval + && caller_base_sval->can_have_associated_state_p ())) + return NULL; + const svalue *summary_iter_sval + = widening_summary_sval->get_iter_svalue (); + const svalue *caller_iter_sval + = convert_svalue_from_summary (summary_iter_sval); + if (!(caller_iter_sval + && caller_iter_sval->can_have_associated_state_p ())) + return NULL; + region_model_manager *mgr = get_manager (); + return mgr->get_or_create_widening_svalue + (summary_iter_sval->get_type (), + point, + caller_base_sval, + caller_iter_sval); + } + break; + case SK_COMPOUND: + { + const compound_svalue *compound_summary_sval + = as_a <const compound_svalue *> (summary_sval); + region_model_manager *mgr = get_manager (); + store_manager *store_mgr = mgr->get_store_manager (); + binding_map caller_map; + auto_vec <const binding_key *> summary_keys; + for (auto kv : *compound_summary_sval) + summary_keys.safe_push (kv.first); + summary_keys.qsort (binding_key::cmp_ptrs); + for (auto key : summary_keys) + { + gcc_assert (key->concrete_p ()); + /* No remapping is needed for concrete binding keys. */ + + const svalue *bound_summary_sval + = compound_summary_sval->get_map ().get (key); + const svalue *caller_sval + = convert_svalue_from_summary (bound_summary_sval); + if (!caller_sval) + caller_sval = mgr->get_or_create_unknown_svalue (NULL_TREE); + + if (const compound_svalue *inner_compound_sval + = caller_sval->dyn_cast_compound_svalue ()) + { + const concrete_binding *outer_key + = as_a <const concrete_binding *> (key); + for (auto inner_kv : *inner_compound_sval) + { + // These should already be mapped to the caller. + const binding_key *inner_key = inner_kv.first; + const svalue *inner_sval = inner_kv.second; + gcc_assert (inner_key->concrete_p ()); + const concrete_binding *concrete_key + = as_a <const concrete_binding *> (inner_key); + bit_offset_t effective_start + = (concrete_key->get_start_bit_offset () + + outer_key->get_start_bit_offset ()); + const concrete_binding *effective_concrete_key + = store_mgr->get_concrete_binding + (effective_start, + concrete_key->get_size_in_bits ()); + caller_map.put (effective_concrete_key, inner_sval); + } + } + else + caller_map.put (key, caller_sval); + } + return mgr->get_or_create_compound_svalue (summary_sval->get_type (), + caller_map); + } + break; + case SK_CONJURED: + { + region_model_manager *mgr = get_manager (); + return mgr->get_or_create_unknown_svalue (summary_sval->get_type ()); + } + break; + case SK_ASM_OUTPUT: + { + const asm_output_svalue *asm_output_summary_sval + = as_a <const asm_output_svalue *> (summary_sval); + const char *asm_string = asm_output_summary_sval->get_asm_string (); + unsigned output_idx = asm_output_summary_sval->get_output_idx (); + unsigned num_inputs = asm_output_summary_sval->get_num_inputs (); + unsigned num_outputs = asm_output_summary_sval->get_num_outputs (); + auto_vec<const svalue *> inputs (num_inputs); + for (unsigned idx = 0; idx < num_inputs; idx++) + { + const svalue *summary_input + = asm_output_summary_sval->get_input (idx); + const svalue *caller_input + = convert_svalue_from_summary (summary_input); + if (!caller_input) + return NULL; + inputs.safe_push (caller_input); + } + region_model_manager *mgr = get_manager (); + return mgr->get_or_create_asm_output_svalue (summary_sval->get_type (), + asm_string, + output_idx, + num_outputs, + inputs); + } + break; + case SK_CONST_FN_RESULT: + { + const const_fn_result_svalue *const_fn_result_summary_sval + = as_a <const const_fn_result_svalue *> (summary_sval); + tree fndecl = const_fn_result_summary_sval->get_fndecl (); + unsigned num_inputs = const_fn_result_summary_sval->get_num_inputs (); + auto_vec<const svalue *> inputs (num_inputs); + for (unsigned idx = 0; idx < num_inputs; idx++) + { + const svalue *summary_input + = const_fn_result_summary_sval->get_input (idx); + const svalue *caller_input + = convert_svalue_from_summary (summary_input); + if (!caller_input) + return NULL; + inputs.safe_push (caller_input); + } + region_model_manager *mgr = get_manager (); + return mgr->get_or_create_const_fn_result_svalue + (summary_sval->get_type (), + fndecl, + inputs); + } + break; + } +} + +/* Try to convert SUMMARY_REG in the summary to a corresponding region + in the caller, caching the result. + + Return NULL if the conversion is not possible. */ + +const region * +call_summary_replay::convert_region_from_summary (const region *summary_reg) +{ + gcc_assert (summary_reg); + + if (const region **slot + = m_map_region_from_summary_to_caller.get (summary_reg)) + return *slot; + + const region *caller_reg = convert_region_from_summary_1 (summary_reg); + + /* Add to cache. */ + add_region_mapping (summary_reg, caller_reg); + + return caller_reg; +} + +/* Implementation of call_summary_replay::convert_region_from_summary. */ + +const region * +call_summary_replay::convert_region_from_summary_1 (const region *summary_reg) +{ + gcc_assert (summary_reg); + + region_model_manager *mgr = get_manager (); + switch (summary_reg->get_kind ()) + { + default: + gcc_unreachable (); + /* Top-level regions. */ + case RK_FRAME: + case RK_GLOBALS: + case RK_CODE: + case RK_STACK: + case RK_HEAP: + case RK_ROOT: + /* These should never be pointed to by a region_svalue. */ + gcc_unreachable (); + + case RK_FUNCTION: + case RK_LABEL: + case RK_STRING: + case RK_UNKNOWN: + /* We can reuse these regions directly. */ + return summary_reg; + + case RK_SYMBOLIC: + { + const symbolic_region *summary_symbolic_reg + = as_a <const symbolic_region *> (summary_reg); + const svalue *summary_ptr_sval = summary_symbolic_reg->get_pointer (); + const svalue *caller_ptr_sval + = convert_svalue_from_summary (summary_ptr_sval); + if (!caller_ptr_sval) + return NULL; + const region *caller_reg + = get_caller_model ()->deref_rvalue (caller_ptr_sval, + NULL_TREE, + get_ctxt ()); + return caller_reg; + } + break; + + case RK_DECL: + { + const decl_region *summary_decl_reg + = as_a <const decl_region *> (summary_reg); + tree decl = summary_decl_reg->get_decl (); + switch (TREE_CODE (decl)) + { + default: + gcc_unreachable (); + case SSA_NAME: + /* We don't care about writes to locals within + the summary. */ + return NULL; + case VAR_DECL: + /* We don't care about writes to locals within + the summary. */ + if (is_global_var (decl)) + /* If it's a global, we can reuse the region directly. */ + return summary_reg; + else + /* Otherwise, we don't care about locals. */ + return NULL; + case RESULT_DECL: + return m_cd.get_lhs_region (); + case PARM_DECL: + /* Writes (by value) to parms should be visible to the caller. */ + return NULL; + } + } + break; + case RK_FIELD: + { + const field_region *summary_field_reg + = as_a <const field_region *> (summary_reg); + const region *summary_parent_reg = summary_reg->get_parent_region (); + const region *caller_parent_reg + = convert_region_from_summary (summary_parent_reg); + if (!caller_parent_reg) + return NULL; + tree field = summary_field_reg->get_field (); + return mgr->get_field_region (caller_parent_reg, field); + } + break; + case RK_ELEMENT: + { + const element_region *summary_element_reg + = as_a <const element_region *> (summary_reg); + const region *summary_parent_reg = summary_reg->get_parent_region (); + const region *caller_parent_reg + = convert_region_from_summary (summary_parent_reg); + if (!caller_parent_reg) + return NULL; + const svalue *summary_index = summary_element_reg->get_index (); + const svalue *caller_index + = convert_svalue_from_summary (summary_index); + if (!caller_index) + return NULL; + return mgr->get_element_region (caller_parent_reg, + summary_reg->get_type (), + caller_index); + } + break; + case RK_OFFSET: + { + const offset_region *summary_offset_reg + = as_a <const offset_region *> (summary_reg); + const region *summary_parent_reg = summary_reg->get_parent_region (); + const region *caller_parent_reg + = convert_region_from_summary (summary_parent_reg); + if (!caller_parent_reg) + return NULL; + const svalue *summary_byte_offset + = summary_offset_reg->get_byte_offset (); + const svalue *caller_byte_offset + = convert_svalue_from_summary (summary_byte_offset); + if (!caller_byte_offset) + return NULL; + return mgr->get_offset_region (caller_parent_reg, + summary_reg->get_type (), + caller_byte_offset); + } + break; + case RK_SIZED: + { + const sized_region *summary_sized_reg + = as_a <const sized_region *> (summary_reg); + const region *summary_parent_reg = summary_reg->get_parent_region (); + const region *caller_parent_reg + = convert_region_from_summary (summary_parent_reg); + if (!caller_parent_reg) + return NULL; + const svalue *summary_byte_size + = summary_sized_reg->get_byte_size_sval (mgr); + const svalue *caller_byte_size + = convert_svalue_from_summary (summary_byte_size); + if (!caller_byte_size) + return NULL; + return mgr->get_sized_region (caller_parent_reg, + summary_reg->get_type (), + caller_byte_size); + } + break; + case RK_CAST: + { + const cast_region *summary_cast_reg + = as_a <const cast_region *> (summary_reg); + const region *summary_original_reg + = summary_cast_reg->get_original_region (); + const region *caller_original_reg + = convert_region_from_summary (summary_original_reg); + if (!caller_original_reg) + return NULL; + return mgr->get_cast_region (caller_original_reg, + summary_reg->get_type ()); + } + break; + case RK_HEAP_ALLOCATED: + { + /* If we have a heap-allocated region in the summary, then + it was allocated within the callee. + Create a new heap-allocated region to summarize this. */ + return mgr->create_region_for_heap_alloc (); + } + break; + case RK_ALLOCA: + return NULL; + case RK_BIT_RANGE: + { + const bit_range_region *summary_bit_range_reg + = as_a <const bit_range_region *> (summary_reg); + const region *summary_parent_reg = summary_reg->get_parent_region (); + const region *caller_parent_reg + = convert_region_from_summary (summary_parent_reg); + if (!caller_parent_reg) + return NULL; + const bit_range &bits = summary_bit_range_reg->get_bits (); + return mgr->get_bit_range (caller_parent_reg, + summary_reg->get_type (), + bits); + } + break; + case RK_VAR_ARG: + return NULL; + } +} + +/* Try to convert SUMMARY_KEY in the summary to a corresponding binding key + in the caller. + + Return NULL if the conversion is not possible. */ + +const binding_key * +call_summary_replay::convert_key_from_summary (const binding_key *summary_key) +{ + if (summary_key->concrete_p ()) + return summary_key; + + const symbolic_binding *symbolic_key = (const symbolic_binding *)summary_key; + const region *summary_reg = symbolic_key->get_region (); + const region *caller_reg = convert_region_from_summary (summary_reg); + if (!caller_reg) + return NULL; + region_model_manager *mgr = get_manager (); + store_manager *store_mgr = mgr->get_store_manager (); + return store_mgr->get_symbolic_binding (caller_reg); +} + +/* Record that SUMMARY_SVAL maps to CALLER_SVAL for this replay. */ + +void +call_summary_replay::add_svalue_mapping (const svalue *summary_sval, + const svalue *caller_sval) +{ + gcc_assert (summary_sval); + // CALLER_SVAL can be NULL + m_map_svalue_from_summary_to_caller.put (summary_sval, caller_sval); +} + +/* Record that SUMMARY_REG maps to CALLER_REG for this replay. */ + +void +call_summary_replay::add_region_mapping (const region *summary_reg, + const region *caller_reg) +{ + gcc_assert (summary_reg); + // CALLER_REG can be NULL + m_map_region_from_summary_to_caller.put (summary_reg, caller_reg); +} + +/* Dump a multiline representation of this object to PP. */ + +void +call_summary_replay::dump_to_pp (pretty_printer *pp, bool simple) const +{ + pp_newline (pp); + pp_string (pp, "CALL DETAILS:"); + pp_newline (pp); + m_cd.dump_to_pp (pp, simple); + + pp_newline (pp); + pp_string (pp, "CALLEE SUMMARY:"); + pp_newline (pp); + m_summary->dump_to_pp (m_ext_state, pp, simple); + + /* Current state of caller (could be in mid-update). */ + pp_newline (pp); + pp_string (pp, "CALLER:"); + pp_newline (pp); + m_cd.get_model ()->dump_to_pp (pp, simple, true); + + pp_newline (pp); + pp_string (pp, "REPLAY STATE:"); + pp_newline (pp); + pp_string (pp, "svalue mappings from summary to caller:"); + pp_newline (pp); + auto_vec <const svalue *> summary_svals; + for (auto kv : m_map_svalue_from_summary_to_caller) + summary_svals.safe_push (kv.first); + summary_svals.qsort (svalue::cmp_ptr_ptr); + for (auto summary_sval : summary_svals) + { + pp_string (pp, "sval in summary: "); + summary_sval->dump_to_pp (pp, simple); + pp_newline (pp); + + const svalue *caller_sval + = *((const_cast<svalue_map_t &> + (m_map_svalue_from_summary_to_caller)).get (summary_sval)); + pp_string (pp, " sval in caller: "); + caller_sval->dump_to_pp (pp, simple); + pp_newline (pp); + } + + pp_newline (pp); + pp_string (pp, "region mappings from summary to caller:"); + pp_newline (pp); + auto_vec <const region *> summary_regs; + for (auto kv : m_map_region_from_summary_to_caller) + summary_regs.safe_push (kv.first); + summary_regs.qsort (region::cmp_ptr_ptr); + for (auto summary_reg : summary_regs) + { + pp_string (pp, "reg in summary: "); + summary_reg->dump_to_pp (pp, simple); + pp_newline (pp); + + const region *caller_reg + = *((const_cast<region_map_t &> + (m_map_region_from_summary_to_caller)).get (summary_reg)); + pp_string (pp, " reg in caller: "); + caller_reg->dump_to_pp (pp, simple); + pp_newline (pp); + } +} + +/* Dump a multiline representation of this object to FILE. */ + +void +call_summary_replay::dump (FILE *fp, bool simple) const +{ + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + pp_show_color (&pp) = pp_show_color (global_dc->printer); + pp.buffer->stream = fp; + dump_to_pp (&pp, simple); + pp_flush (&pp); +} + +/* Dump a multiline representation of this object to stderr. */ + +DEBUG_FUNCTION void +call_summary_replay::dump (bool simple) const +{ + dump (stderr, simple); +} + +} // namespace ana + +#endif /* #if ENABLE_ANALYZER */ diff --git a/gcc/analyzer/call-summary.h b/gcc/analyzer/call-summary.h new file mode 100644 index 0000000..f4c5ff0 --- /dev/null +++ b/gcc/analyzer/call-summary.h @@ -0,0 +1,118 @@ +/* Classes for working with summaries of function calls. + Copyright (C) 2022 David Malcolm <dmalcolm@redhat.com>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_ANALYZER_CALL_SUMMARY_H +#define GCC_ANALYZER_CALL_SUMMARY_H + +namespace ana { + +/* A class summarizing one particular outcome of a function that + we've already analyzed. + This lets us efficiently replay the analysis when we see calls + to the function, providing an approximation of the behavior of + the function without having to execute within the function itself. */ + +class call_summary +{ +public: + call_summary (per_function_data *per_fn_data, + const exploded_node *enode) + : m_per_fn_data (per_fn_data), + m_enode (enode) + {} + const program_state &get_state () const; + tree get_fndecl () const; + + label_text get_desc () const; + + void dump_to_pp (const extrinsic_state &ext_state, + pretty_printer *pp, + bool simple) const; + void dump (const extrinsic_state &ext_state, FILE *fp, bool simple) const; + void dump (const extrinsic_state &ext_state, bool simple) const; + +private: + void get_user_facing_desc (pretty_printer *pp) const; + + per_function_data *const m_per_fn_data; + const exploded_node *const m_enode; +}; + +/* A class for handling replaying a specific call summary at + a specific call site. + + Supports remapping svalues and regions, e.g. remapping + INIT_VAL(param of callee) + to: + whatever that argument is at the call site. */ + +class call_summary_replay +{ +public: + call_summary_replay (const call_details &cd, + function *called_fn, + call_summary *m_summary, + const extrinsic_state &ext_state); + + const call_details &get_call_details () const { return m_cd; } + const gcall *get_call_stmt () const { return m_cd.get_call_stmt (); } + region_model_manager *get_manager () const { return m_cd.get_manager (); } + store_manager *get_store_manager () const + { + return get_manager ()->get_store_manager (); + } + region_model_context *get_ctxt () const { return m_cd.get_ctxt (); } + region_model *get_caller_model () const { return m_cd.get_model (); } + + const svalue *convert_svalue_from_summary (const svalue *); + const region *convert_region_from_summary (const region *); + const binding_key *convert_key_from_summary (const binding_key *); + + void add_svalue_mapping (const svalue *summary_sval, + const svalue *caller_sval); + void add_region_mapping (const region *summary_sval, + const region *caller_sval); + + void dump_to_pp (pretty_printer *pp, bool simple) const; + void dump (FILE *fp, bool simple) const; + void dump (bool simple) const; + +private: + DISABLE_COPY_AND_ASSIGN (call_summary_replay); + + const svalue *convert_svalue_from_summary_1 (const svalue *); + const region *convert_region_from_summary_1 (const region *); + + const call_details &m_cd; + function *m_called_fn; + call_summary *m_summary; + const extrinsic_state &m_ext_state; + + // Mapping from svalues in summary to svalues for callsite: + typedef hash_map <const svalue *, const svalue *> svalue_map_t; + svalue_map_t m_map_svalue_from_summary_to_caller; + + // Mapping from regions in summary to regions for callsite: + typedef hash_map <const region *, const region *> region_map_t; + region_map_t m_map_region_from_summary_to_caller; +}; + +} // namespace ana + +#endif /* GCC_ANALYZER_CALL_SUMMARY_H */ diff --git a/gcc/analyzer/constraint-manager.cc b/gcc/analyzer/constraint-manager.cc index 4133a13..6685e2e 100644 --- a/gcc/analyzer/constraint-manager.cc +++ b/gcc/analyzer/constraint-manager.cc @@ -48,6 +48,7 @@ along with GCC; see the file COPYING3. If not see #include "analyzer/store.h" #include "analyzer/region-model.h" #include "analyzer/constraint-manager.h" +#include "analyzer/call-summary.h" #include "analyzer/analyzer-selftests.h" #include "tree-pretty-print.h" @@ -3043,6 +3044,60 @@ constraint_manager::for_each_fact (fact_visitor *visitor) const } } +/* Subclass of fact_visitor for use by + constraint_manager::replay_call_summary. */ + +class replay_fact_visitor : public fact_visitor +{ +public: + replay_fact_visitor (call_summary_replay &r, + constraint_manager *out) + : m_r (r), m_out (out), m_feasible (true) + {} + + bool feasible_p () const { return m_feasible; } + + void on_fact (const svalue *lhs, enum tree_code code, const svalue *rhs) + final override + { + const svalue *caller_lhs = m_r.convert_svalue_from_summary (lhs); + if (!caller_lhs) + return; + const svalue *caller_rhs = m_r.convert_svalue_from_summary (rhs); + if (!caller_rhs) + return; + if (!m_out->add_constraint (caller_lhs, code, caller_rhs)) + m_feasible = false; + } + + void on_ranges (const svalue *lhs_sval, + const bounded_ranges *ranges) final override + { + const svalue *caller_lhs = m_r.convert_svalue_from_summary (lhs_sval); + if (!caller_lhs) + return; + if (!m_out->add_bounded_ranges (caller_lhs, ranges)) + m_feasible = false; + } + +private: + call_summary_replay &m_r; + constraint_manager *m_out; + bool m_feasible; +}; + +/* Attempt to use R to replay the constraints from SUMMARY into this object. + Return true if it is feasible. */ + +bool +constraint_manager::replay_call_summary (call_summary_replay &r, + const constraint_manager &summary) +{ + replay_fact_visitor v (r, this); + summary.for_each_fact (&v); + return v.feasible_p (); +} + /* Assert that this object is valid. */ void diff --git a/gcc/analyzer/constraint-manager.h b/gcc/analyzer/constraint-manager.h index 1271f18..daacaa3 100644 --- a/gcc/analyzer/constraint-manager.h +++ b/gcc/analyzer/constraint-manager.h @@ -487,6 +487,9 @@ public: bounded_ranges_manager *get_range_manager () const; + bool replay_call_summary (call_summary_replay &r, + const constraint_manager &summary); + auto_delete_vec<equiv_class> m_equiv_classes; auto_vec<constraint> m_constraints; auto_vec<bounded_ranges_constraint> m_bounded_ranges_constraints; diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 742ac02..b4deee5 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -72,6 +72,7 @@ along with GCC; see the file COPYING3. If not see #include "attribs.h" #include "tree-dfa.h" #include "analyzer/known-function-manager.h" +#include "analyzer/call-summary.h" /* For an overview, see gcc/doc/analyzer.texi. */ @@ -1425,6 +1426,26 @@ exploded_node::on_stmt (exploded_graph &eg, &old_state, state, uncertainty, path_ctxt, stmt); + /* Handle call summaries here. */ + if (cgraph_edge *cgedge + = supergraph_call_edge (snode->get_function (), stmt)) + if (eg.get_analysis_plan ().use_summary_p (cgedge)) + { + function *called_fn = get_ultimate_function_for_cgraph_edge (cgedge); + per_function_data *called_fn_data + = eg.get_per_function_data (called_fn); + if (called_fn_data) + return replay_call_summaries (eg, + snode, + as_a <const gcall *> (stmt), + state, + uncertainty, + path_ctxt, + called_fn, + called_fn_data, + &ctxt); + } + bool unknown_side_effects = false; bool terminate_path = false; @@ -1520,6 +1541,142 @@ exploded_node::on_stmt_post (const gimple *stmt, state->m_region_model->on_call_post (call, unknown_side_effects, ctxt); } +/* A concrete call_info subclass representing a replay of a call summary. */ + +class call_summary_edge_info : public call_info +{ +public: + call_summary_edge_info (const call_details &cd, + function *called_fn, + call_summary *summary, + const extrinsic_state &ext_state) + : call_info (cd), + m_called_fn (called_fn), + m_summary (summary), + m_ext_state (ext_state) + {} + + bool update_state (program_state *state, + const exploded_edge *, + region_model_context *ctxt) const final override + { + /* Update STATE based on summary_end_state. */ + call_details cd (get_call_details (state->m_region_model, ctxt)); + call_summary_replay r (cd, m_called_fn, m_summary, m_ext_state); + const program_state &summary_end_state = m_summary->get_state (); + return state->replay_call_summary (r, summary_end_state); + } + + bool update_model (region_model *model, + const exploded_edge *, + region_model_context *ctxt) const final override + { + /* Update STATE based on summary_end_state. */ + call_details cd (get_call_details (model, ctxt)); + call_summary_replay r (cd, m_called_fn, m_summary, m_ext_state); + const program_state &summary_end_state = m_summary->get_state (); + model->replay_call_summary (r, *summary_end_state.m_region_model); + return true; + } + + label_text get_desc (bool /*can_colorize*/) const final override + { + return m_summary->get_desc (); + } + +private: + function *m_called_fn; + call_summary *m_summary; + const extrinsic_state &m_ext_state; +}; + +/* Use PATH_CTXT to bifurcate, which when handled will add custom edges + for a replay of the various feasible summaries in CALLED_FN_DATA. */ + +exploded_node::on_stmt_flags +exploded_node::replay_call_summaries (exploded_graph &eg, + const supernode *snode, + const gcall *call_stmt, + program_state *state, + uncertainty_t *uncertainty, + path_context *path_ctxt, + function *called_fn, + per_function_data *called_fn_data, + region_model_context *ctxt) +{ + logger *logger = eg.get_logger (); + LOG_SCOPE (logger); + + gcc_assert (called_fn); + gcc_assert (called_fn_data); + + /* Each summary will call bifurcate on the PATH_CTXT. */ + for (auto summary : called_fn_data->m_summaries) + replay_call_summary (eg, snode, call_stmt, state, uncertainty, + path_ctxt, called_fn, summary, ctxt); + path_ctxt->terminate_path (); + + return on_stmt_flags (); +} + +/* Use PATH_CTXT to bifurcate, which when handled will add a + custom edge for a replay of SUMMARY, if the summary's + conditions are feasible based on the current state. */ + +void +exploded_node::replay_call_summary (exploded_graph &eg, + const supernode *snode, + const gcall *call_stmt, + program_state *old_state, + uncertainty_t *uncertainty, + path_context *path_ctxt, + function *called_fn, + call_summary *summary, + region_model_context *ctxt) +{ + logger *logger = eg.get_logger (); + LOG_SCOPE (logger); + gcc_assert (snode); + gcc_assert (call_stmt); + gcc_assert (old_state); + gcc_assert (called_fn); + gcc_assert (summary); + + if (logger) + logger->log ("using %s as summary for call to %qE from %qE", + summary->get_desc ().get (), + called_fn->decl, + snode->get_function ()->decl); + const extrinsic_state &ext_state = eg.get_ext_state (); + const program_state &summary_end_state = summary->get_state (); + if (logger) + { + pretty_printer *pp = logger->get_printer (); + + logger->start_log_line (); + pp_string (pp, "callsite state: "); + old_state->dump_to_pp (ext_state, true, false, pp); + logger->end_log_line (); + + logger->start_log_line (); + pp_string (pp, "summary end state: "); + summary_end_state.dump_to_pp (ext_state, true, false, pp); + logger->end_log_line (); + } + + program_state new_state (*old_state); + + call_details cd (call_stmt, new_state.m_region_model, ctxt); + call_summary_replay r (cd, called_fn, summary, ext_state); + + if (path_ctxt) + path_ctxt->bifurcate (new call_summary_edge_info (cd, + called_fn, + summary, + ext_state)); +} + + /* Consider the effect of following superedge SUCC from this node. Return true if it's feasible to follow the edge, or false @@ -2115,6 +2272,20 @@ stats::get_total_enodes () const return result; } +/* struct per_function_data. */ + +per_function_data::~per_function_data () +{ + for (auto iter : m_summaries) + delete iter; +} + +void +per_function_data::add_call_summary (exploded_node *node) +{ + m_summaries.safe_push (new call_summary (this, node)); +} + /* strongly_connected_components's ctor. Tarjan's SCC algorithm. */ strongly_connected_components:: @@ -3980,7 +4151,7 @@ exploded_graph::process_node (exploded_node *node) NULL, // uncertainty_t *uncertainty NULL, // path_context *path_ctxt stmt); - if (edge_info->update_model (bifurcated_new_state.m_region_model, + if (edge_info->update_state (&bifurcated_new_state, NULL, /* no exploded_edge yet. */ &bifurcation_ctxt)) { @@ -5350,24 +5521,17 @@ public: pretty_printer *pp = gv->get_pp (); dump_dot_id (pp); - pp_printf (pp, " [shape=none,margin=0,style=filled,fillcolor=%s,label=<", + pp_printf (pp, " [shape=none,margin=0,style=filled,fillcolor=%s,label=\"", "lightgrey"); - pp_string (pp, "<TABLE BORDER=\"0\">"); pp_write_text_to_stream (pp); - gv->begin_trtd (); pp_printf (pp, "VCG: %i: %s", m_index, function_name (m_fun)); - gv->end_tdtr (); pp_newline (pp); - gv->begin_trtd (); pp_printf (pp, "supernodes: %i\n", m_num_supernodes); - gv->end_tdtr (); pp_newline (pp); - gv->begin_trtd (); pp_printf (pp, "superedges: %i\n", m_num_superedges); - gv->end_tdtr (); pp_newline (pp); if (args.m_eg) @@ -5380,9 +5544,7 @@ public: if (enode->get_point ().get_function () == m_fun) num_enodes++; } - gv->begin_trtd (); pp_printf (pp, "enodes: %i\n", num_enodes); - gv->end_tdtr (); pp_newline (pp); // TODO: also show the per-callstring breakdown @@ -5404,11 +5566,8 @@ public: } if (num_enodes > 0) { - gv->begin_trtd (); cs->print (pp); pp_printf (pp, ": %i\n", num_enodes); - pp_write_text_as_html_like_dot_to_stream (pp); - gv->end_tdtr (); } } @@ -5417,14 +5576,20 @@ public: if (data) { pp_newline (pp); - gv->begin_trtd (); pp_printf (pp, "summaries: %i\n", data->m_summaries.length ()); - pp_write_text_as_html_like_dot_to_stream (pp); - gv->end_tdtr (); + for (auto summary : data->m_summaries) + { + pp_printf (pp, "\nsummary: %s:\n", summary->get_desc ().get ()); + const extrinsic_state &ext_state = args.m_eg->get_ext_state (); + const program_state &state = summary->get_state (); + state.dump_to_pp (ext_state, false, true, pp); + pp_newline (pp); + } } } - pp_string (pp, "</TABLE>>];\n\n"); + pp_write_text_as_dot_label_to_stream (pp, /*for_record=*/true); + pp_string (pp, "\"];\n\n"); pp_flush (pp); } diff --git a/gcc/analyzer/exploded-graph.h b/gcc/analyzer/exploded-graph.h index f957568..ea4a890 100644 --- a/gcc/analyzer/exploded-graph.h +++ b/gcc/analyzer/exploded-graph.h @@ -21,6 +21,15 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_ANALYZER_EXPLODED_GRAPH_H #define GCC_ANALYZER_EXPLODED_GRAPH_H +#include "alloc-pool.h" +#include "fibonacci_heap.h" +#include "supergraph.h" +#include "sbitmap.h" +#include "shortest-paths.h" +#include "analyzer/sm.h" +#include "analyzer/program-state.h" +#include "analyzer/diagnostic-manager.h" + namespace ana { /* Concrete implementation of region_model_context, wiring it up to the @@ -258,6 +267,25 @@ class exploded_node : public dnode<eg_traits> bool unknown_side_effects, region_model_context *ctxt); + on_stmt_flags replay_call_summaries (exploded_graph &eg, + const supernode *snode, + const gcall *call_stmt, + program_state *state, + uncertainty_t *uncertainty, + path_context *path_ctxt, + function *called_fn, + per_function_data *called_fn_data, + region_model_context *ctxt); + void replay_call_summary (exploded_graph &eg, + const supernode *snode, + const gcall *call_stmt, + program_state *state, + uncertainty_t *uncertainty, + path_context *path_ctxt, + function *called_fn, + call_summary *summary, + region_model_context *ctxt); + bool on_edge (exploded_graph &eg, const superedge *succ, program_point *next_point, @@ -611,13 +639,11 @@ struct per_call_string_data struct per_function_data { per_function_data () {} + ~per_function_data (); - void add_call_summary (exploded_node *node) - { - m_summaries.safe_push (node); - } + void add_call_summary (exploded_node *node); - auto_vec<exploded_node *> m_summaries; + auto_vec<call_summary *> m_summaries; }; diff --git a/gcc/analyzer/known-function-manager.h b/gcc/analyzer/known-function-manager.h index fbde853..2b95b7e 100644 --- a/gcc/analyzer/known-function-manager.h +++ b/gcc/analyzer/known-function-manager.h @@ -21,6 +21,8 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_ANALYZER_KNOWN_FUNCTION_MANAGER_H #define GCC_ANALYZER_KNOWN_FUNCTION_MANAGER_H +#include "analyzer/analyzer-logging.h" + namespace ana { class known_function_manager : public log_user diff --git a/gcc/analyzer/program-point.h b/gcc/analyzer/program-point.h index 63f7224..f72b86d 100644 --- a/gcc/analyzer/program-point.h +++ b/gcc/analyzer/program-point.h @@ -21,6 +21,9 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_ANALYZER_PROGRAM_POINT_H #define GCC_ANALYZER_PROGRAM_POINT_H +#include "pretty-print.h" +#include "analyzer/call-string.h" + namespace ana { class exploded_graph; diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc index f0f4046..b54bdce 100644 --- a/gcc/analyzer/program-state.cc +++ b/gcc/analyzer/program-state.cc @@ -56,6 +56,7 @@ along with GCC; see the file COPYING3. If not see #include "analyzer/program-state.h" #include "analyzer/exploded-graph.h" #include "analyzer/state-purge.h" +#include "analyzer/call-summary.h" #include "analyzer/analyzer-selftests.h" #if ENABLE_ANALYZER @@ -743,6 +744,31 @@ program_state::program_state (const extrinsic_state &ext_state) } } +/* Attempt to to use R to replay SUMMARY into this object. + Return true if it is possible. */ + +bool +sm_state_map::replay_call_summary (call_summary_replay &r, + const sm_state_map &summary) +{ + for (auto kv : summary.m_map) + { + const svalue *summary_sval = kv.first; + const svalue *caller_sval = r.convert_svalue_from_summary (summary_sval); + if (!caller_sval) + continue; + const svalue *summary_origin = kv.second.m_origin; + const svalue *caller_origin + = (summary_origin + ? r.convert_svalue_from_summary (summary_origin) + : NULL); + // caller_origin can be NULL. + m_map.put (caller_sval, entry_t (kv.second.m_state, caller_origin)); + } + m_global_state = summary.m_global_state; + return true; +} + /* program_state's copy ctor. */ program_state::program_state (const program_state &other) @@ -1437,6 +1463,28 @@ program_state::detect_leaks (const program_state &src_state, dest_state.m_region_model->unset_dynamic_extents (reg); } +/* Attempt to to use R to replay SUMMARY into this object. + Return true if it is possible. */ + +bool +program_state::replay_call_summary (call_summary_replay &r, + const program_state &summary) +{ + if (!m_region_model->replay_call_summary (r, *summary.m_region_model)) + return false; + + for (unsigned sm_idx = 0; sm_idx < m_checker_states.length (); sm_idx++) + { + const sm_state_map *summary_sm_map = summary.m_checker_states[sm_idx]; + m_checker_states[sm_idx]->replay_call_summary (r, *summary_sm_map); + } + + if (!summary.m_valid) + m_valid = false; + + return true; +} + /* Handle calls to "__analyzer_dump_state". */ void diff --git a/gcc/analyzer/program-state.h b/gcc/analyzer/program-state.h index baab787..ad40578 100644 --- a/gcc/analyzer/program-state.h +++ b/gcc/analyzer/program-state.h @@ -171,6 +171,9 @@ public: static const svalue * canonicalize_svalue (const svalue *sval, const extrinsic_state &ext_state); + bool replay_call_summary (call_summary_replay &r, + const sm_state_map &summary); + private: const state_machine &m_sm; map_t m_map; @@ -273,6 +276,9 @@ public: const extrinsic_state &ext_state, region_model_context *ctxt); + bool replay_call_summary (call_summary_replay &r, + const program_state &summary); + void impl_call_analyzer_dump_state (const gcall *call, const extrinsic_state &ext_state, region_model_context *ctxt); diff --git a/gcc/analyzer/region-model-manager.cc b/gcc/analyzer/region-model-manager.cc index 1956cfc..9a2ee08 100644 --- a/gcc/analyzer/region-model-manager.cc +++ b/gcc/analyzer/region-model-manager.cc @@ -1275,6 +1275,33 @@ get_or_create_asm_output_svalue (tree type, return asm_output_sval; } +/* Return the svalue * of type TYPE for OUTPUT_IDX of a deterministic + asm stmt with string ASM_STRING with NUM_OUTPUTS outputs, given + INPUTS as inputs. */ + +const svalue * +region_model_manager:: +get_or_create_asm_output_svalue (tree type, + const char *asm_string, + unsigned output_idx, + unsigned num_outputs, + const vec<const svalue *> &inputs) +{ + gcc_assert (inputs.length () <= asm_output_svalue::MAX_INPUTS); + + if (const svalue *folded + = maybe_fold_asm_output_svalue (type, inputs)) + return folded; + + asm_output_svalue::key_t key (type, asm_string, output_idx, inputs); + if (asm_output_svalue **slot = m_asm_output_values_map.get (key)) + return *slot; + asm_output_svalue *asm_output_sval + = new asm_output_svalue (type, asm_string, output_idx, num_outputs, inputs); + RETURN_UNKNOWN_IF_TOO_COMPLEX (asm_output_sval); + m_asm_output_values_map.put (key, asm_output_sval); + return asm_output_sval; +} /* Return the svalue * of type TYPE for the result of a call to FNDECL with __attribute__((const)), given INPUTS as inputs. */ diff --git a/gcc/analyzer/region-model-manager.h b/gcc/analyzer/region-model-manager.h index 0057326..3d8f76e 100644 --- a/gcc/analyzer/region-model-manager.h +++ b/gcc/analyzer/region-model-manager.h @@ -81,6 +81,12 @@ public: unsigned output_idx, const vec<const svalue *> &inputs); const svalue * + get_or_create_asm_output_svalue (tree type, + const char *asm_string, + unsigned output_idx, + unsigned num_outputs, + const vec<const svalue *> &inputs); + const svalue * get_or_create_const_fn_result_svalue (tree type, tree fndecl, const vec<const svalue *> &inputs); diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index e92bba2..d14f3a1 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -66,6 +66,7 @@ along with GCC; see the file COPYING3. If not see #include "analyzer/region-model-reachability.h" #include "analyzer/analyzer-selftests.h" #include "analyzer/program-state.h" +#include "analyzer/call-summary.h" #include "stor-layout.h" #include "attribs.h" #include "tree-object-size.h" @@ -5038,11 +5039,8 @@ region_model::maybe_update_for_edge (const superedge &edge, break; case SUPEREDGE_INTRAPROCEDURAL_CALL: - { - const callgraph_superedge *cg_sedge - = as_a <const callgraph_superedge *> (&edge); - update_for_call_summary (*cg_sedge, ctxt); - } + /* This is a no-op for call summaries; we should already + have handled the effect of the call summary at the call stmt. */ break; } @@ -5140,25 +5138,34 @@ region_model::update_for_return_superedge (const return_superedge &return_edge, update_for_return_gcall (call_stmt, ctxt); } -/* Update this region_model with a summary of the effect of calling - and returning from CG_SEDGE. +/* Attempt to to use R to replay SUMMARY into this object. + Return true if it is possible. */ - TODO: Currently this is extremely simplistic: we merely set the - return value to "unknown". A proper implementation would e.g. update - sm-state, and presumably be reworked to support multiple outcomes. */ - -void -region_model::update_for_call_summary (const callgraph_superedge &cg_sedge, - region_model_context *ctxt) +bool +region_model::replay_call_summary (call_summary_replay &r, + const region_model &summary) { - /* For now, set any return value to "unknown". */ - const gcall *call_stmt = cg_sedge.get_call_stmt (); - tree lhs = gimple_call_lhs (call_stmt); - if (lhs) - mark_region_as_unknown (get_lvalue (lhs, ctxt), - ctxt ? ctxt->get_uncertainty () : NULL); + gcc_assert (summary.get_stack_depth () == 1); + + m_store.replay_call_summary (r, summary.m_store); - // TODO: actually implement some kind of summary here + if (!m_constraints->replay_call_summary (r, *summary.m_constraints)) + return false; + + for (auto kv : summary.m_dynamic_extents) + { + const region *summary_reg = kv.first; + const region *caller_reg = r.convert_region_from_summary (summary_reg); + if (!caller_reg) + continue; + const svalue *summary_sval = kv.second; + const svalue *caller_sval = r.convert_svalue_from_summary (summary_sval); + if (!caller_sval) + continue; + m_dynamic_extents.put (caller_reg, caller_sval); + } + + return true; } /* Given a true or false edge guarded by conditional statement COND_STMT, diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index be0a97a..6903090 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -529,6 +529,9 @@ class region_model const svalue *get_string_size (const svalue *sval) const; const svalue *get_string_size (const region *reg) const; + bool replay_call_summary (call_summary_replay &r, + const region_model &summary); + void maybe_complain_about_infoleak (const region *dst_reg, const svalue *copied_sval, const region *src_reg, @@ -570,8 +573,6 @@ class region_model region_model_context *ctxt); void update_for_return_superedge (const return_superedge &return_edge, region_model_context *ctxt); - void update_for_call_summary (const callgraph_superedge &cg_sedge, - region_model_context *ctxt); bool apply_constraints_for_gcond (const cfg_superedge &edge, const gcond *cond_stmt, region_model_context *ctxt, diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc index 1857d95..74b481d 100644 --- a/gcc/analyzer/store.cc +++ b/gcc/analyzer/store.cc @@ -56,6 +56,7 @@ along with GCC; see the file COPYING3. If not see #include "analyzer/program-point.h" #include "analyzer/store.h" #include "analyzer/region-model.h" +#include "analyzer/call-summary.h" #include "analyzer/analyzer-selftests.h" #include "stor-layout.h" @@ -3130,6 +3131,166 @@ store::loop_replay_fixup (const store *other_store, } } +/* Use R to replay the bindings from SUMMARY into this object. */ + +void +store::replay_call_summary (call_summary_replay &r, + const store &summary) +{ + if (summary.m_called_unknown_fn) + { + /* A call to an external function occurred in the summary. + Hence we need to invalidate our knownledge of globals, + escaped regions, etc. */ + on_unknown_fncall (r.get_call_stmt (), + r.get_store_manager (), + conjured_purge (r.get_caller_model (), + r.get_ctxt ())); + } + + auto_vec<const region *> keys (summary.m_cluster_map.elements ()); + for (auto kv : summary.m_cluster_map) + keys.quick_push (kv.first); + keys.qsort (region::cmp_ptr_ptr); + for (auto base_reg : keys) + replay_call_summary_cluster (r, summary, base_reg); +} + +/* Use R and SUMMARY to replay the bindings in SUMMARY_CLUSTER + into this object. */ + +void +store::replay_call_summary_cluster (call_summary_replay &r, + const store &summary, + const region *summary_base_reg) +{ + const call_details &cd = r.get_call_details (); + region_model_manager *reg_mgr = r.get_manager (); + store_manager *mgr = reg_mgr->get_store_manager (); + const binding_cluster *summary_cluster + = summary.get_cluster (summary_base_reg); + + /* Handle "ESCAPED" and "TOUCHED" flags. */ + if (summary_cluster->escaped_p () || summary_cluster->touched_p ()) + if (const region *caller_reg + = r.convert_region_from_summary (summary_base_reg)) + { + const region *caller_base_reg = caller_reg->get_base_region (); + binding_cluster *caller_cluster + = get_or_create_cluster (caller_base_reg); + if (summary_cluster->escaped_p ()) + caller_cluster->mark_as_escaped (); + if (summary_cluster->touched_p ()) + caller_cluster->m_touched = true; + } + + switch (summary_base_reg->get_kind ()) + { + /* Top-level regions. */ + case RK_FRAME: + case RK_GLOBALS: + case RK_CODE: + case RK_STACK: + case RK_HEAP: + case RK_ROOT: + /* Child regions. */ + case RK_FIELD: + case RK_ELEMENT: + case RK_OFFSET: + case RK_SIZED: + case RK_CAST: + case RK_BIT_RANGE: + /* Other regions. */ + case RK_VAR_ARG: + case RK_UNKNOWN: + /* These should never be the base region of a binding cluster. */ + gcc_unreachable (); + break; + + case RK_FUNCTION: + case RK_LABEL: + case RK_STRING: + /* These can be marked as escaping. */ + break; + + case RK_SYMBOLIC: + { + const symbolic_region *summary_symbolic_reg + = as_a <const symbolic_region *> (summary_base_reg); + const svalue *summary_ptr_sval = summary_symbolic_reg->get_pointer (); + const svalue *caller_ptr_sval + = r.convert_svalue_from_summary (summary_ptr_sval); + if (!caller_ptr_sval) + return; + const region *caller_dest_reg + = cd.get_model ()->deref_rvalue (caller_ptr_sval, + NULL_TREE, + cd.get_ctxt ()); + const svalue *summary_sval + = summary.get_any_binding (mgr, summary_base_reg); + if (!summary_sval) + return; + const svalue *caller_sval + = r.convert_svalue_from_summary (summary_sval); + if (!caller_sval) + caller_sval = + reg_mgr->get_or_create_unknown_svalue (summary_sval->get_type ()); + set_value (mgr, caller_dest_reg, + caller_sval, NULL /* uncertainty_t * */); + } + break; + case RK_DECL: + { + const region *caller_dest_reg + = r.convert_region_from_summary (summary_base_reg); + if (!caller_dest_reg) + return; + const svalue *summary_sval + = summary.get_any_binding (mgr, summary_base_reg); + const svalue *caller_sval + = r.convert_svalue_from_summary (summary_sval); + if (!caller_sval) + caller_sval = + reg_mgr->get_or_create_unknown_svalue (summary_sval->get_type ()); + set_value (mgr, caller_dest_reg, + caller_sval, NULL /* uncertainty_t * */); + } + break; + case RK_HEAP_ALLOCATED: + { + const region *caller_dest_reg + = r.convert_region_from_summary (summary_base_reg); + gcc_assert (caller_dest_reg); + binding_cluster *caller_cluster + = get_or_create_cluster (caller_dest_reg); + auto_vec <const binding_key *> summary_keys; + for (auto kv : *summary_cluster) + summary_keys.safe_push (kv.first); + summary_keys.qsort (binding_key::cmp_ptrs); + for (auto summary_key : summary_keys) + { + const binding_key *caller_key + = r.convert_key_from_summary (summary_key); + if (!caller_key) + continue; + const svalue *summary_sval + = summary_cluster->get_map ().get (summary_key); + const svalue *caller_sval + = r.convert_svalue_from_summary (summary_sval); + if (!caller_sval) + caller_sval = reg_mgr->get_or_create_unknown_svalue + (summary_sval->get_type ()); + caller_cluster->bind_key (caller_key, caller_sval); + } + } + break; + + case RK_ALLOCA: + /* Ignore bindings of alloca regions in the summary. */ + break; + } +} + #if CHECKING_P namespace selftest { diff --git a/gcc/analyzer/store.h b/gcc/analyzer/store.h index d172ee7..0b5cbd6 100644 --- a/gcc/analyzer/store.h +++ b/gcc/analyzer/store.h @@ -21,6 +21,8 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_ANALYZER_STORE_H #define GCC_ANALYZER_STORE_H +#include "tristate.h" + /* Implementation of the region-based ternary model described in: "A Memory Model for Static Analysis of C Programs" (Zhongxing Xu, Ted Kremenek, and Jian Zhang) @@ -418,6 +420,14 @@ private: } // namespace ana +template <> +template <> +inline bool +is_a_helper <const ana::concrete_binding *>::test (const ana::binding_key *key) +{ + return key->concrete_p (); +} + template <> struct default_hash_traits<ana::concrete_binding> : public member_function_hash_traits<ana::concrete_binding> { @@ -786,6 +796,12 @@ public: void loop_replay_fixup (const store *other_store, region_model_manager *mgr); + void replay_call_summary (call_summary_replay &r, + const store &summary); + void replay_call_summary_cluster (call_summary_replay &r, + const store &summary, + const region *base_reg); + private: void remove_overlapping_bindings (store_manager *mgr, const region *reg, uncertainty_t *uncertainty); diff --git a/gcc/analyzer/supergraph.cc b/gcc/analyzer/supergraph.cc index 01e30f7..a4a495a 100644 --- a/gcc/analyzer/supergraph.cc +++ b/gcc/analyzer/supergraph.cc @@ -62,7 +62,7 @@ namespace ana { /* Get the function of the ultimate alias target being called at EDGE, if any. */ -static function * +function * get_ultimate_function_for_cgraph_edge (cgraph_edge *edge) { cgraph_node *ultimate_node = edge->callee->ultimate_alias_target (); @@ -74,12 +74,13 @@ get_ultimate_function_for_cgraph_edge (cgraph_edge *edge) /* Get the cgraph_edge, but only if there's an underlying function body. */ cgraph_edge * -supergraph_call_edge (function *fun, gimple *stmt) +supergraph_call_edge (function *fun, const gimple *stmt) { - gcall *call = dyn_cast<gcall *> (stmt); + const gcall *call = dyn_cast<const gcall *> (stmt); if (!call) return NULL; - cgraph_edge *edge = cgraph_node::get (fun->decl)->get_edge (stmt); + cgraph_edge *edge + = cgraph_node::get (fun->decl)->get_edge (const_cast <gimple *> (stmt)); if (!edge) return NULL; if (!edge->callee) diff --git a/gcc/analyzer/supergraph.h b/gcc/analyzer/supergraph.h index e9a5be2..f66058c 100644 --- a/gcc/analyzer/supergraph.h +++ b/gcc/analyzer/supergraph.h @@ -21,6 +21,13 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_ANALYZER_SUPERGRAPH_H #define GCC_ANALYZER_SUPERGRAPH_H +#include "ordered-hash-map.h" +#include "cfg.h" +#include "basic-block.h" +#include "gimple.h" +#include "gimple-iterator.h" +#include "digraph.h" + using namespace ana; namespace ana { @@ -605,7 +612,8 @@ class dot_annotator } }; -extern cgraph_edge *supergraph_call_edge (function *fun, gimple *stmt); +extern cgraph_edge *supergraph_call_edge (function *fun, const gimple *stmt); +extern function *get_ultimate_function_for_cgraph_edge (cgraph_edge *edge); } // namespace ana diff --git a/gcc/analyzer/svalue.cc b/gcc/analyzer/svalue.cc index a37c152..d8d419a 100644 --- a/gcc/analyzer/svalue.cc +++ b/gcc/analyzer/svalue.cc @@ -1728,13 +1728,17 @@ unmergeable_svalue::implicitly_live_p (const svalue_set *live_svalues, compound_svalue::compound_svalue (tree type, const binding_map &map) : svalue (calc_complexity (map), type), m_map (map) { - /* All keys within the underlying binding_map are required to be concrete, - not symbolic. */ #if CHECKING_P for (iterator_t iter = begin (); iter != end (); ++iter) { + /* All keys within the underlying binding_map are required to be concrete, + not symbolic. */ const binding_key *key = (*iter).first; gcc_assert (key->concrete_p ()); + + /* We don't nest compound svalues. */ + const svalue *sval = (*iter).second; + gcc_assert (sval->get_kind () != SK_COMPOUND); } #endif } diff --git a/gcc/analyzer/svalue.h b/gcc/analyzer/svalue.h index 9393d6e..9d0f04d 100644 --- a/gcc/analyzer/svalue.h +++ b/gcc/analyzer/svalue.h @@ -21,7 +21,10 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_ANALYZER_SVALUE_H #define GCC_ANALYZER_SVALUE_H +#include "json.h" #include "analyzer/complexity.h" +#include "analyzer/store.h" +#include "analyzer/program-point.h" using namespace ana; @@ -1527,6 +1530,7 @@ public: const char *get_asm_string () const { return m_asm_string; } unsigned get_output_idx () const { return m_output_idx; } + unsigned get_num_outputs () const { return m_num_outputs; } unsigned get_num_inputs () const { return m_num_inputs; } const svalue *get_input (unsigned idx) const { return m_input_arr[idx]; } diff --git a/gcc/testsuite/gcc.dg/analyzer/call-summaries-2.c b/gcc/testsuite/gcc.dg/analyzer/call-summaries-2.c new file mode 100644 index 0000000..0aaf67b --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/call-summaries-2.c @@ -0,0 +1,646 @@ +/* { dg-additional-options "-fanalyzer-call-summaries --param analyzer-min-snodes-for-call-summary=0" } */ + +/* There need to be at least two calls to a function for the + call-summarization code to be used. + TODO: add some kind of test that summarization *was* used. */ + +#include <stdlib.h> +#include "analyzer-decls.h" + +extern int external_fn (void *); + +int returns_const (void) +{ + return 42; +} + +void test_summarized_returns_const (void) +{ + __analyzer_eval (returns_const () == 42); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_const () == 42); /* { dg-warning "TRUE" } */ +} + +void test_summarized_returns_const_2 (void) +{ + returns_const (); /* { dg-message "when 'returns_const' returns" } */ + __analyzer_dump_path (); /* { dg-message "path" } */ +} + +int returns_param (int i) +{ + return i; +} + +void test_summarized_returns_param (int j) +{ + __analyzer_eval (returns_param (j) == j); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_param (j) == j); /* { dg-warning "TRUE" } */ +} + +void writes_const_to_ptr (int *p) +{ + *p = 42; +} + +void test_summarized_writes_const_to_ptr (void) +{ + int i, j; + writes_const_to_ptr (&i); + writes_const_to_ptr (&j); + __analyzer_eval (i == 42); /* { dg-warning "TRUE" } */ + __analyzer_eval (j == 42); /* { dg-warning "TRUE" } */ +} + +// TODO: we should complain about this: + +void test_summarized_write_through_null (void) +{ + writes_const_to_ptr (NULL); +} + +void writes_param_to_ptr (int i, int *p) +{ + *p = i; +} + +void test_summarized_writes_param_to_ptr (int j) +{ + int x, y; + writes_param_to_ptr (j, &x); + writes_param_to_ptr (j, &y); + __analyzer_eval (x == j); /* { dg-warning "TRUE" } */ + __analyzer_eval (y == j); /* { dg-warning "TRUE" } */ +} + +int g; + +void writes_to_global (int i) +{ + g = i; +} + +void test_writes_to_global (int x, int y) +{ + writes_to_global (x); + __analyzer_eval (g == x); /* { dg-warning "TRUE" } */ + + writes_to_global (y); + __analyzer_eval (g == y); /* { dg-warning "TRUE" } */ +} + +int reads_from_global (void) +{ + return g; +} + +void test_reads_from_global (int x, int y) +{ + g = x; + __analyzer_eval (reads_from_global () == x); /* { dg-warning "TRUE" } */ + + g = y; + __analyzer_eval (reads_from_global () == y); /* { dg-warning "TRUE" } */ +} + +/* Example of a unary op. */ + +int returns_negation (int i) +{ + return -i; +} + +void test_returns_negation (int x) +{ + __analyzer_eval (returns_negation (5) == -5); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_negation (x) == -x); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_negation (-x) == x); /* { dg-warning "TRUE" } */ +} + +/* Example of a binary op. */ + +int returns_sum (int i, int j) +{ + return i + j; +} + +void test_returns_sum (int x, int y) +{ + __analyzer_eval (returns_sum (5, 3) == 8); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_sum (7, 2) == 9); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_sum (x, y) == x + y); /* { dg-warning "TRUE" } */ +} + +struct coord +{ + int x; + int y; +}; + +struct coord make_coord (int x, int y) +{ + struct coord result = {x, y}; + return result; +} + +void test_make_coord (int i, int j) +{ + struct coord c1 = make_coord (3, 4); + __analyzer_eval (c1.x == 3); /* { dg-warning "TRUE" } */ + __analyzer_eval (c1.y == 4); /* { dg-warning "TRUE" } */ + + struct coord c2 = make_coord (i, j); + __analyzer_eval (c2.x == i); /* { dg-warning "TRUE" } */ + __analyzer_eval (c2.y == j); /* { dg-warning "TRUE" } */ +} + +/* Example of nested usage of summaries. */ + +struct rect +{ + struct coord nw; + struct coord se; +}; + +struct rect make_rect (int top, int bottom, int left, int right) +{ + struct rect result = {make_coord (left, top), + make_coord (right, bottom)}; + return result; +} + +void test_make_rect (int top, int bottom, int left, int right) +{ + struct rect r1 = make_rect (3, 4, 5, 6); + __analyzer_eval (r1.nw.y == 3); /* { dg-warning "TRUE" } */ + __analyzer_eval (r1.se.y == 4); /* { dg-warning "TRUE" } */ + __analyzer_eval (r1.nw.x == 5); /* { dg-warning "TRUE" } */ + __analyzer_eval (r1.se.x == 6); /* { dg-warning "TRUE" } */ + + struct rect r2 = make_rect (top, bottom, left, right); + __analyzer_eval (r2.nw.y == top); /* { dg-warning "TRUE" } */ + __analyzer_eval (r2.se.y == bottom); /* { dg-warning "TRUE" } */ + __analyzer_eval (r2.nw.x == left); /* { dg-warning "TRUE" } */ + __analyzer_eval (r2.se.x == right); /* { dg-warning "TRUE" } */ +} + +const char *returns_str (void) +{ + return "abc"; +} + +void test_returns_str (void) +{ + __analyzer_eval (returns_str () != NULL); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_str ()[0] == 'a'); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_str ()[1] == 'b'); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_str ()[2] == 'c'); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_str ()[3] == '\0'); /* { dg-warning "TRUE" } */ +} + +int returns_field (struct coord *p) +{ + return p->y; +} + +void test_returns_field (struct coord *q) +{ + __analyzer_eval (returns_field (q) == q->y); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_field (q) == q->y); /* { dg-warning "TRUE" } */ +} + +void writes_to_fields (struct coord *p, int x, int y) +{ + p->x = x; + p->y = y; +} + +void test_writes_to_field (struct coord *q, int qx, int qy) +{ + struct coord a, b; + writes_to_fields (&a, 1, 2); + __analyzer_eval (a.x == 1); /* { dg-warning "TRUE" } */ + __analyzer_eval (a.y == 2); /* { dg-warning "TRUE" } */ + writes_to_fields (&b, 3, 4); + __analyzer_eval (b.x == 3); /* { dg-warning "TRUE" } */ + __analyzer_eval (b.y == 4); /* { dg-warning "TRUE" } */ + writes_to_fields (q, qx, qy); + __analyzer_eval (q->x == qx); /* { dg-warning "TRUE" } */ + __analyzer_eval (q->y == qy); /* { dg-warning "TRUE" } */ +} + +/* Example of nested function summarization. */ + +int get_min_x (struct rect *p) +{ + return p->nw.x; +} + +int get_min_y (struct rect *p) +{ + return p->nw.y; +} + +int get_max_x (struct rect *p) +{ + return p->se.x; +} + +int get_max_y (struct rect *p) +{ + return p->se.y; +} + +int get_area (struct rect *p) +{ + return ((get_max_x (p) - get_min_x (p)) + * (get_max_y (p) - get_min_y (p))); +} + +void test_get_area (int top, int bottom, int left, int right, struct rect *p) +{ + { + /* 1x1 at origin. */ + struct rect a = make_rect (0, 1, 0, 1); + __analyzer_eval (get_min_y (&a) == 0); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_max_y (&a) == 1); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_min_x (&a) == 0); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_max_x (&a) == 1); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_area (&a) == 1); /* { dg-warning "TRUE" } */ + } + + { + /* 4x5. */ + struct rect b = make_rect (3, 7, 4, 9); + __analyzer_eval (get_min_y (&b) == 3); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_max_y (&b) == 7); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_min_x (&b) == 4); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_max_x (&b) == 9); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_area (&b) == 20); /* { dg-warning "TRUE" } */ + } + + { + /* Symbolic. */ + struct rect c = make_rect (top, bottom, left, right); + __analyzer_eval (get_min_y (&c) == top); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_max_y (&c) == bottom); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_min_x (&c) == left); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_max_x (&c) == right); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_area (&c) == ((right - left) * (bottom - top))); /* { dg-warning "TRUE" } */ + } + + /* Symbolic via ptr. */ + __analyzer_eval (get_min_y (p) == p->nw.y); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_max_y (p) == p->se.y); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_min_x (p) == p->nw.x); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_max_x (p) == p->se.x); /* { dg-warning "TRUE" } */ + __analyzer_eval (get_area (p) == ((p->se.x - p->nw.x) * (p->se.y - p->nw.y))); /* { dg-warning "TRUE" } */ +} + +int returns_element (int i) +{ + static const int arr[3] = {7, 8, 9}; + return arr[i]; +} + +void test_returns_element (int j) +{ + __analyzer_eval (returns_element (0) == 7); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (returns_element (1) == 8); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (returns_element (2) == 9); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (returns_element (3) == 10); /* { dg-warning "UNKNOWN" } */ + // TODO: out of bounds +} + +const int *returns_element_ptr (int i) +{ + static const int arr[3] = {7, 8, 9}; + return &arr[i]; +} + +int test_returns_element_ptr (int j) +{ + __analyzer_eval (*returns_element_ptr (0) == 7); /* { dg-warning "TRUE" } */ + __analyzer_eval (*returns_element_ptr (1) == 8); /* { dg-warning "TRUE" } */ + __analyzer_eval (*returns_element_ptr (2) == 9); /* { dg-warning "TRUE" } */ + return *returns_element_ptr (3); /* { dg-warning "buffer overread" } */ +} + +int returns_offset (int arr[3], int i) +{ + return arr[i]; +} + +void test_returns_offset (int outer_arr[3], int idx) +{ + int a[3] = {4, 5, 6}; + __analyzer_eval (returns_offset (a, 0) == 4); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_offset (a, 1) == 5); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_offset (a, 2) == 6); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_offset (a, idx) == a[idx]); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (returns_offset (outer_arr, 0) == outer_arr[0]); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_offset (outer_arr, idx) == outer_arr[idx]); /* { dg-warning "TRUE" } */ +} + +int returns_offset_2 (int arr[], int i) +{ + return arr[i]; +} + +void test_returns_offset_2 (int outer_arr[], int idx) +{ + int a[3] = {4, 5, 6}; + __analyzer_eval (returns_offset_2 (a, 0) == 4); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_offset_2 (a, 1) == 5); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_offset_2 (a, 2) == 6); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_offset_2 (a, idx) == a[idx]); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (returns_offset_2 (outer_arr, 0) == outer_arr[0]); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_offset_2 (outer_arr, idx) == outer_arr[idx]); /* { dg-warning "TRUE" } */ +} + +int returns_offset_3 (int *p, int i) +{ + return p[i]; +} + +void test_returns_offset_3 (int *q, int j) +{ + __analyzer_eval (returns_offset_3 (q, j) == q[j]); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_offset_3 (q, j) == q[j]); /* { dg-warning "TRUE" } */ +} + +/* With state merging, this is summarized as returning "UNKNOWN". */ + +int two_outcomes (int flag, int x, int y) +{ + if (flag) + return x; + else + return y; +} + +void test_two_outcomes (int outer_flag, int a, int b) +{ + int r; + __analyzer_eval (two_outcomes (1, a, b) == a); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (two_outcomes (0, a, b) == b); /* { dg-warning "UNKNOWN" } */ + r = two_outcomes (outer_flag, a, b); + if (outer_flag) + __analyzer_eval (r == a); /* { dg-warning "UNKNOWN" } */ + else + __analyzer_eval (r == b); /* { dg-warning "UNKNOWN" } */ +} + +/* Verify that summary replays capture postconditions. */ + +void check_int_nonnegative (int i) +{ + if (i < 0) + __builtin_unreachable (); +} + +void test_check_int_nonnegative (int i, int j) +{ + __analyzer_eval (i >= 0); /* { dg-warning "UNKNOWN" } */ + check_int_nonnegative (i); + __analyzer_eval (i >= 0); /* { dg-warning "TRUE" } */ + + __analyzer_eval (j >= 0); /* { dg-warning "UNKNOWN" } */ + check_int_nonnegative (j); + __analyzer_eval (j >= 0); /* { dg-warning "TRUE" } */ +} + +void calls_external_fn (void) +{ + external_fn (NULL); +} + +void test_calls_external_fn (void) +{ + g = 1; + __analyzer_eval (g == 1); /* { dg-warning "TRUE" } */ + calls_external_fn (); + calls_external_fn (); + __analyzer_eval (g == 1); /* { dg-warning "UNKNOWN" "expected" { xfail *-*-* } } */ + /* { dg-bogus "TRUE" "bogus" { xfail *-*-* } .-1 } */ + // TODO(xfails) +} + +int returns_iterator (int n) +{ + int i; + for (i = 0; i < n; i++) + { + } + return i; +} + +void test_returns_iterator (int j, int k) +{ + __analyzer_eval (returns_iterator (j) == j); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (returns_iterator (k) == k); /* { dg-warning "UNKNOWN" } */ + /* TODO: ideally we'd capture these equalities, but this is an issue + with how we handle loops. */ +} + +int returns_external_result (void) +{ + return external_fn (NULL); +} + +int test_returns_external_result (void) +{ + int i, j; + i = returns_external_result (); + j = returns_external_result (); + __analyzer_eval (i == j); /* { dg-warning "UNKNOWN" } */ + return i * j; +} + +int uses_alloca (int i) +{ + int *p = alloca (sizeof (int)); + *p = i; + return *p; +} + +void test_uses_alloca (int x) +{ + __analyzer_eval (uses_alloca (42) == 42); /* { dg-warning "TRUE" } */ + __analyzer_eval (uses_alloca (x) == x); /* { dg-warning "TRUE" } */ +} + +struct bits +{ + unsigned b0 : 1; + unsigned b1 : 1; + unsigned b2 : 1; + unsigned b3 : 1; + unsigned b4 : 1; + unsigned b5 : 1; + unsigned b6 : 1; + unsigned b7 : 1; +}; + +int returns_bitfield (struct bits b) +{ + return b.b3; +} + +void test_returns_bitfield (struct bits s) +{ + __analyzer_eval (returns_bitfield (s) == s.b3); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (returns_bitfield (s) == s.b3); /* { dg-warning "UNKNOWN" } */ + // TODO: ideally it would figure out that these are equal +} + +int consume_two_ints_from_va_list (__builtin_va_list ap) +{ + int i, j; + i = __builtin_va_arg (ap, int); + j = __builtin_va_arg (ap, int); + return i * j; +} + +int test_consume_two_ints_from_va_list (__builtin_va_list ap1) +{ + int p1, p2; + __builtin_va_list ap2; + __builtin_va_copy (ap2, ap1); + p1 = consume_two_ints_from_va_list (ap1); + p2 = consume_two_ints_from_va_list (ap2); + __analyzer_eval (p1 == p2); /* { dg-warning "UNKNOWN" } */ + // TODO: ideally it would figure out these are equal + __builtin_va_end (ap2); +} + +int consume_two_ints_from_varargs (int placeholder, ...) +{ + int i, j; + __builtin_va_list ap; + __builtin_va_start (ap, placeholder); + i = __builtin_va_arg (ap, int); + j = __builtin_va_arg (ap, int); + __builtin_va_end (ap); + return i * j; +} + +void test_consume_two_ints_from_varargs (int x, int y) +{ + __analyzer_eval (consume_two_ints_from_varargs (0, 4, 5) == 20); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (consume_two_ints_from_varargs (0, 3, 6) == 18); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (consume_two_ints_from_varargs (0, x, 6) == x * y); /* { dg-warning "UNKNOWN" } */ + // TODO: ideally it would figure these out +} + +extern int const_fn_1 (int) __attribute__ ((const)); +int calls_const_fn (int i) +{ + return const_fn_1 (i); +} + +void test_calls_const_fn (int x) +{ + int r1, r2; + r1 = calls_const_fn (x); + r2 = calls_const_fn (x); + __analyzer_eval (r1 == r2); /* { dg-warning "TRUE" } */ +} + +typedef struct iv2 { int arr[2]; } iv2_t; +typedef struct iv4 { int arr[4]; } iv4_t; + +iv2_t returns_iv2_t (int x, int y) +{ + iv2_t result = {x, y}; + return result; +} + +void test_returns_iv2_t (int a, int b) +{ + __analyzer_eval (returns_iv2_t (a, b).arr[0] == a); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_iv2_t (a, b).arr[1] == b); /* { dg-warning "TRUE" } */ +} + +iv4_t returns_iv4_t (int a, iv2_t bc, int d) +{ + iv4_t result = {a, bc.arr[0], bc.arr[1], d}; + return result; +} + +void test_returns_iv4_t (int p, iv2_t qr, int s) +{ + __analyzer_eval (returns_iv4_t (p, qr, s).arr[0] == p); /* { dg-warning "TRUE" } */ + __analyzer_eval (returns_iv4_t (p, qr, s).arr[1] == qr.arr[0]); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (returns_iv4_t (p, qr, s).arr[2] == qr.arr[1]); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (returns_iv4_t (p, qr, s).arr[3] == s); /* { dg-warning "TRUE" } */ + // TODO: ideally the UNKNOWNs would be TRUEs. +} + +void copies_iv2_t (int *p, iv2_t xy) +{ + __builtin_memcpy (p, &xy, sizeof (xy)); +} + +void test_copies_iv2_t (iv2_t ab, iv2_t cd) +{ + iv4_t t; + copies_iv2_t (&t.arr[0], ab); + copies_iv2_t (&t.arr[2], cd); + __analyzer_eval (t.arr[0] = ab.arr[0]); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (t.arr[1] = ab.arr[1]); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (t.arr[2] = cd.arr[0]); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (t.arr[3] = cd.arr[1]); /* { dg-warning "UNKNOWN" } */ + // TODO: ideally the UNKNOWNs would be TRUEs. +} + +void partially_inits (int *p, int v) +{ + p[1] = v; +} + +void test_partially_inits (int x) +{ + int arr[2]; + partially_inits (arr, x); + partially_inits (arr, x); + + __analyzer_eval (arr[0]); /* { dg-warning "UNKNOWN" "eval" } */ + /* { dg-warning "use of uninitialized value 'arr\\\[0\\\]'" "uninit" { target *-*-* } .-1 } */ + + __analyzer_eval (arr[1] == x); /* { dg-warning "UNKNOWN" "eval" } */ + /* { dg-bogus "use of uninitialized value 'arr\\\[1\\\]'" "uninit" { xfail *-*-* } .-1 } */ + // TODO(xfail), and eval should be "TRUE" +} + +int uses_switch (int i) +{ + switch (i) + { + case 0: + return 42; + case 1: + return 17; + default: + return i * 2; + } +} + +void test_uses_switch (int x) +{ + __analyzer_eval (uses_switch (0) == 42); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (uses_switch (1) == 17); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (uses_switch (2) == x * 2); /* { dg-warning "UNKNOWN" } */ + // TODO: ideally the UNKNOWNs would be TRUEs. +} + +int *returns_ptr_to_first_field (struct coord *p) +{ + return &p->x; +} + +void test_returns_ptr_to_first_field (struct coord *q) +{ + __analyzer_eval (returns_ptr_to_first_field (q) == (int *)q); /* { dg-warning "UNKNOWN" } */ + __analyzer_eval (returns_ptr_to_first_field (q) == (int *)q); /* { dg-warning "UNKNOWN" } */ + // TODO: ideally the UNKNOWNs would be TRUEs. +} diff --git a/gcc/testsuite/gcc.dg/analyzer/call-summaries-3.c b/gcc/testsuite/gcc.dg/analyzer/call-summaries-3.c new file mode 100644 index 0000000..d63eb0c --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/call-summaries-3.c @@ -0,0 +1,29 @@ +/* { dg-additional-options "-fanalyzer-call-summaries --param analyzer-min-snodes-for-call-summary=0 -fno-analyzer-state-merge" } */ + +/* There need to be at least two calls to a function for the + call-summarization code to be used. + TODO: add some kind of test that summarization *was* used. */ + +#include "analyzer-decls.h" + +/* With state merging disabled, we get two summaries here. */ + +int two_outcomes (int flag, int x, int y) +{ + if (flag) + return x; + else + return y; +} + +void test_two_outcomes (int outer_flag, int a, int b) +{ + int r; + __analyzer_eval (two_outcomes (1, a, b) == a); /* { dg-warning "TRUE" } */ + __analyzer_eval (two_outcomes (0, a, b) == b); /* { dg-warning "TRUE" } */ + r = two_outcomes (outer_flag, a, b); + if (outer_flag) + __analyzer_eval (r == a); /* { dg-warning "TRUE" } */ + else + __analyzer_eval (r == b); /* { dg-warning "TRUE" } */ +} diff --git a/gcc/testsuite/gcc.dg/analyzer/call-summaries-asm-x86.c b/gcc/testsuite/gcc.dg/analyzer/call-summaries-asm-x86.c new file mode 100644 index 0000000..cc23283 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/call-summaries-asm-x86.c @@ -0,0 +1,20 @@ +/* { dg-do compile { target x86_64-*-* } } */ +/* { dg-additional-options "-fanalyzer-call-summaries --param analyzer-min-snodes-for-call-summary=0" } */ + +#include "analyzer-decls.h" + +int returns_asm_value (void) +{ + int dst; + asm ("mov 42, %0" + : "=r" (dst)); + return dst; +} + +void test_returns_asm_value (void) +{ + int a, b; + a = returns_asm_value (); + b = returns_asm_value (); + __analyzer_eval (a == b); /* { dg-warning "TRUE" } */ +} diff --git a/gcc/testsuite/gcc.dg/analyzer/call-summaries-malloc.c b/gcc/testsuite/gcc.dg/analyzer/call-summaries-malloc.c new file mode 100644 index 0000000..87173a0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/call-summaries-malloc.c @@ -0,0 +1,80 @@ +/* { dg-additional-options "-fanalyzer-call-summaries --param analyzer-min-snodes-for-call-summary=0" } */ + +/* There need to be at least two calls to a function for the + call-summarization code to be used. + TODO: add some kind of test that summarization *was* used. */ + +#include <stdlib.h> +#include <string.h> +#include "analyzer-decls.h" + +int *malloc_int (int i) +{ + int *res = malloc (sizeof (int)); + if (!res) + return NULL; + *res = i; + return res; +} + +void test_malloc_int (int x) +{ + int *p, *q; + + p = malloc_int (42); + if (p) + __analyzer_eval (*p == 42); /* { dg-warning "TRUE" } */ + free (p); + + q = malloc_int (x); + if (q) + __analyzer_eval (*q == x); /* { dg-warning "TRUE" } */ + free (q); +} + +void test_leak (int x) +{ + int *p = malloc_int (x); /* { dg-message "when 'malloc_int' returns pointer to heap-allocated buffer" } */ +} /* { dg-message "leak of 'p'" } */ + +void *wrapped_malloc (size_t sz) +{ + return malloc (sz); +} + +void wrapped_free (void *p) +{ + free (p); +} + +void test_wrapped_malloc_and_free (size_t sz) +{ + void *p = wrapped_malloc (100); + void *q = wrapped_malloc (sz); + __analyzer_dump_capacity (p); /* { dg-warning "capacity: '\\(\[^\n\r\]*\\)100'" } */ + __analyzer_dump_capacity (q); /* { dg-warning "capacity: 'INIT_VAL\\(sz_\[^\n\r\]*\\)'" } */ + wrapped_free (p); + wrapped_free (q); +} + +void test_use_after_free (void) +{ + // TODO +} + +void test_use_without_check (size_t sz) +{ + char *buf = wrapped_malloc (sz); /* { dg-message "this call could return NULL" } */ + memset (buf, 'x', sz); /* { dg-warning "use of possibly-NULL 'buf' where non-null expected" } */ + wrapped_free (buf); +} + +void test_out_of_bounds (size_t sz) +{ + char *buf = wrapped_malloc (sz); + if (!buf) + return; + memset (buf, 'x', sz); + buf[sz] = '\0'; /* { dg-warning "heap-based buffer overflow" } */ + wrapped_free (buf); +} diff --git a/gcc/testsuite/gcc.dg/analyzer/call-summaries-pr107072.c b/gcc/testsuite/gcc.dg/analyzer/call-summaries-pr107072.c new file mode 100644 index 0000000..1e689b3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/call-summaries-pr107072.c @@ -0,0 +1,90 @@ +/* { dg-additional-options "-fanalyzer-call-summaries --param analyzer-min-snodes-for-call-summary=0" } */ + +/* There need to be at least two calls to a function for the + call-summarization code to be used. + TODO: add some kind of test that summarization *was* used. */ + +/* Reduced from an example in Emacs in which string_char_and_length + was being incorrectly summarized, failing to see the write to *length. */ + +typedef long int ptrdiff_t; +typedef struct Lisp_X *Lisp_Word; +typedef Lisp_Word Lisp_Object; +extern _Bool STRING_MULTIBYTE(Lisp_Object str); +extern unsigned char *SDATA(Lisp_Object string); +enum { MAX_2_BYTE_CHAR = 0x7FF }; +enum { MAX_3_BYTE_CHAR = 0xFFFF }; +enum { MAX_4_BYTE_CHAR = 0x1FFFFF }; +enum { MAX_5_BYTE_CHAR = 0x3FFF7F }; +extern int make_char_multibyte(int c); +static inline int string_char_and_length(unsigned char const *p, int *length) { + int c = p[0]; + if (!(c & 0x80)) { + *length = 1; + return c; + } + ((0xC0 <= c) ? (void)0 : __builtin_unreachable()); + int d = (c << 6) + p[1] - ((0xC0 << 6) + 0x80); + if (!(c & 0x20)) { + *length = 2; + return d + (c < 0xC2 ? 0x3FFF80 : 0); + } + d = (d << 6) + p[2] - ((0x20 << 12) + 0x80); + if (!(c & 0x10)) { + *length = 3; + ((MAX_2_BYTE_CHAR < d && d <= MAX_3_BYTE_CHAR) + ? (void)0 + : __builtin_unreachable()); + return d; + } + d = (d << 6) + p[3] - ((0x10 << 18) + 0x80); + if (!(c & 0x08)) { + *length = 4; + ((MAX_3_BYTE_CHAR < d && d <= MAX_4_BYTE_CHAR) + ? (void)0 + : __builtin_unreachable()); + return d; + } + d = (d << 6) + p[4] - ((0x08 << 24) + 0x80); + *length = 5; + ((MAX_4_BYTE_CHAR < d && d <= MAX_5_BYTE_CHAR) + ? (void)0 + : __builtin_unreachable()); + return d; +} +int fetch_string_char_advance(Lisp_Object string, + ptrdiff_t *charidx, + ptrdiff_t *byteidx) { + int output; + ptrdiff_t b = *byteidx; + unsigned char *chp = SDATA(string) + b; + if (STRING_MULTIBYTE(string)) { + int chlen; + output = string_char_and_length(chp, &chlen); + b += chlen; + } else { + output = *chp; + b++; + } + (*charidx)++; + *byteidx = b; + return output; +} +int fetch_string_char_as_multibyte_advance(Lisp_Object string, + ptrdiff_t *charidx, + ptrdiff_t *byteidx) { + int output; + ptrdiff_t b = *byteidx; + unsigned char *chp = SDATA(string) + b; + if (STRING_MULTIBYTE(string)) { + int chlen; + output = string_char_and_length(chp, &chlen); + b += chlen; + } else { + output = make_char_multibyte(*chp); + b++; + } + (*charidx)++; + *byteidx = b; + return output; +} |