diff options
-rw-r--r-- | gcc/ChangeLog | 13 | ||||
-rw-r--r-- | gcc/Makefile.in | 5 | ||||
-rw-r--r-- | gcc/asan.c | 404 | ||||
-rw-r--r-- | gcc/asan.h | 26 | ||||
-rw-r--r-- | gcc/common.opt | 4 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 13 | ||||
-rw-r--r-- | gcc/passes.c | 1 | ||||
-rw-r--r-- | gcc/toplev.c | 5 | ||||
-rw-r--r-- | gcc/tree-pass.h | 1 |
9 files changed, 469 insertions, 3 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d815477..53dfd98 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2012-11-12 Wei Mi <wmi@google.com> + Diego Novillo <dnovillo@google.com> + Dodji Seketeli <dodji@redhat.com> + + * Makefile.in: Add asan.c and its dependencies. + * common.opt: Add -faddress-sanitizer option. + * invoke.texi: Document the new flag. + * passes.c: Add the asan pass. + * toplev.c (compile_file): Call asan_finish_file. + * asan.c: New file. + * asan.h: New file. + * tree-pass.h: Declare pass_asan. + 2012-11-12 Tobias Burnus <burnus@net-b.de> * diagnostic.c (diagnostic_append_note): Also call va_end when diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 24791a4..dde9b50 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1349,6 +1349,7 @@ OBJS = \ tracer.o \ trans-mem.o \ tree-affine.o \ + asan.o \ tree-call-cdce.o \ tree-cfg.o \ tree-cfgcleanup.o \ @@ -2207,6 +2208,10 @@ stor-layout.o : stor-layout.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(TREE_H) $(PARAMS_H) $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) $(RTL_H) \ $(GGC_H) $(TM_P_H) $(TARGET_H) langhooks.h $(REGS_H) gt-stor-layout.h \ $(DIAGNOSTIC_CORE_H) $(CGRAPH_H) $(TREE_INLINE_H) $(TREE_DUMP_H) $(GIMPLE_H) +asan.o : asan.c asan.h $(CONFIG_H) pointer-set.h \ + $(SYSTEM_H) $(TREE_H) $(GIMPLE_H) \ + output.h $(DIAGNOSTIC_H) coretypes.h $(TREE_DUMP_H) $(FLAGS_H) \ + tree-pretty-print.h tree-ssa-tail-merge.o: tree-ssa-tail-merge.c \ $(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(BITMAP_H) \ $(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) \ diff --git a/gcc/asan.c b/gcc/asan.c new file mode 100644 index 0000000..4b07c9646 --- /dev/null +++ b/gcc/asan.c @@ -0,0 +1,404 @@ +/* AddressSanitizer, a fast memory error detector. + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Kostya Serebryany <kcc@google.com> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "tm_p.h" +#include "basic-block.h" +#include "flags.h" +#include "function.h" +#include "tree-inline.h" +#include "gimple.h" +#include "tree-iterator.h" +#include "tree-flow.h" +#include "tree-dump.h" +#include "tree-pass.h" +#include "diagnostic.h" +#include "demangle.h" +#include "langhooks.h" +#include "ggc.h" +#include "cgraph.h" +#include "gimple.h" +#include "asan.h" +#include "gimple-pretty-print.h" + +/* + AddressSanitizer finds out-of-bounds and use-after-free bugs + with <2x slowdown on average. + + The tool consists of two parts: + instrumentation module (this file) and a run-time library. + The instrumentation module adds a run-time check before every memory insn. + For a 8- or 16- byte load accessing address X: + ShadowAddr = (X >> 3) + Offset + ShadowValue = *(char*)ShadowAddr; // *(short*) for 16-byte access. + if (ShadowValue) + __asan_report_load8(X); + For a load of N bytes (N=1, 2 or 4) from address X: + ShadowAddr = (X >> 3) + Offset + ShadowValue = *(char*)ShadowAddr; + if (ShadowValue) + if ((X & 7) + N - 1 > ShadowValue) + __asan_report_loadN(X); + Stores are instrumented similarly, but using __asan_report_storeN functions. + A call too __asan_init() is inserted to the list of module CTORs. + + The run-time library redefines malloc (so that redzone are inserted around + the allocated memory) and free (so that reuse of free-ed memory is delayed), + provides __asan_report* and __asan_init functions. + + Read more: + http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm + + Future work: + The current implementation supports only detection of out-of-bounds and + use-after-free bugs in heap. + In order to support out-of-bounds for stack and globals we will need + to create redzones for stack and global object and poison them. +*/ + +/* The shadow address is computed as (X>>asan_scale) + (1<<asan_offset_log). + We may want to add command line flags to change these values. */ + +static const int asan_scale = 3; +static const int asan_offset_log_32 = 29; +static const int asan_offset_log_64 = 44; +static int asan_offset_log; + + +/* Construct a function tree for __asan_report_{load,store}{1,2,4,8,16}. + IS_STORE is either 1 (for a store) or 0 (for a load). + SIZE_IN_BYTES is one of 1, 2, 4, 8, 16. */ + +static tree +report_error_func (int is_store, int size_in_bytes) +{ + tree fn_type; + tree def; + char name[100]; + + sprintf (name, "__asan_report_%s%d\n", + is_store ? "store" : "load", size_in_bytes); + fn_type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); + def = build_fn_decl (name, fn_type); + TREE_NOTHROW (def) = 1; + TREE_THIS_VOLATILE (def) = 1; /* Attribute noreturn. Surprise! */ + DECL_ATTRIBUTES (def) = tree_cons (get_identifier ("leaf"), + NULL, DECL_ATTRIBUTES (def)); + DECL_ASSEMBLER_NAME (def); + return def; +} + +/* Construct a function tree for __asan_init(). */ + +static tree +asan_init_func (void) +{ + tree fn_type; + tree def; + + fn_type = build_function_type_list (void_type_node, NULL_TREE); + def = build_fn_decl ("__asan_init", fn_type); + TREE_NOTHROW (def) = 1; + DECL_ASSEMBLER_NAME (def); + return def; +} + + +/* Instrument the memory access instruction BASE. + Insert new statements before ITER. + LOCATION is source code location. + IS_STORE is either 1 (for a store) or 0 (for a load). + SIZE_IN_BYTES is one of 1, 2, 4, 8, 16. */ + +static void +build_check_stmt (tree base, + gimple_stmt_iterator *iter, + location_t location, int is_store, int size_in_bytes) +{ + gimple_stmt_iterator gsi; + basic_block cond_bb, then_bb, join_bb; + edge e; + tree cond, t, u; + tree base_addr; + tree shadow_value; + gimple g; + gimple_seq seq, stmts; + tree shadow_type = size_in_bytes == 16 ? + short_integer_type_node : char_type_node; + tree shadow_ptr_type = build_pointer_type (shadow_type); + tree uintptr_type = lang_hooks.types.type_for_mode (ptr_mode, + /*unsignedp=*/true); + + /* We first need to split the current basic block, and start altering + the CFG. This allows us to insert the statements we're about to + construct into the right basic blocks. */ + + cond_bb = gimple_bb (gsi_stmt (*iter)); + gsi = *iter; + gsi_prev (&gsi); + if (!gsi_end_p (gsi)) + e = split_block (cond_bb, gsi_stmt (gsi)); + else + e = split_block_after_labels (cond_bb); + cond_bb = e->src; + join_bb = e->dest; + + /* A recap at this point: join_bb is the basic block at whose head + is the gimple statement for which this check expression is being + built. cond_bb is the (possibly new, synthetic) basic block the + end of which will contain the cache-lookup code, and a + conditional that jumps to the cache-miss code or, much more + likely, over to join_bb. */ + + /* Create the bb that contains the crash block. */ + then_bb = create_empty_bb (cond_bb); + make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); + make_single_succ_edge (then_bb, join_bb, EDGE_FALLTHRU); + + /* Mark the pseudo-fallthrough edge from cond_bb to join_bb. */ + e = find_edge (cond_bb, join_bb); + e->flags = EDGE_FALSE_VALUE; + e->count = cond_bb->count; + e->probability = REG_BR_PROB_BASE; + + /* Update dominance info. Note that bb_join's data was + updated by split_block. */ + if (dom_info_available_p (CDI_DOMINATORS)) + { + set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); + set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb); + } + + base_addr = create_tmp_reg (uintptr_type, "__asan_base_addr"); + + seq = NULL; + t = fold_convert_loc (location, uintptr_type, + unshare_expr (base)); + t = force_gimple_operand (t, &stmts, false, NULL_TREE); + gimple_seq_add_seq (&seq, stmts); + g = gimple_build_assign (base_addr, t); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + + /* Build (base_addr >> asan_scale) + (1 << asan_offset_log). */ + + t = build2 (RSHIFT_EXPR, uintptr_type, base_addr, + build_int_cst (uintptr_type, asan_scale)); + t = build2 (PLUS_EXPR, uintptr_type, t, + build2 (LSHIFT_EXPR, uintptr_type, + build_int_cst (uintptr_type, 1), + build_int_cst (uintptr_type, asan_offset_log) + )); + t = build1 (INDIRECT_REF, shadow_type, + build1 (VIEW_CONVERT_EXPR, shadow_ptr_type, t)); + t = force_gimple_operand (t, &stmts, false, NULL_TREE); + gimple_seq_add_seq (&seq, stmts); + shadow_value = create_tmp_reg (shadow_type, "__asan_shadow"); + g = gimple_build_assign (shadow_value, t); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + t = build2 (NE_EXPR, boolean_type_node, shadow_value, + build_int_cst (shadow_type, 0)); + if (size_in_bytes < 8) + { + + /* Slow path for 1-, 2- and 4- byte accesses. + Build ((base_addr & 7) + (size_in_bytes - 1)) >= shadow_value. */ + + u = build2 (BIT_AND_EXPR, uintptr_type, + base_addr, + build_int_cst (uintptr_type, 7)); + u = build1 (CONVERT_EXPR, shadow_type, u); + u = build2 (PLUS_EXPR, shadow_type, u, + build_int_cst (shadow_type, size_in_bytes - 1)); + u = build2 (GE_EXPR, uintptr_type, u, shadow_value); + } + else + u = build_int_cst (boolean_type_node, 1); + t = build2 (TRUTH_AND_EXPR, boolean_type_node, t, u); + t = force_gimple_operand (t, &stmts, false, NULL_TREE); + gimple_seq_add_seq (&seq, stmts); + cond = create_tmp_reg (boolean_type_node, "__asan_crash_cond"); + g = gimple_build_assign (cond, t); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + g = gimple_build_cond (NE_EXPR, cond, boolean_false_node, NULL_TREE, + NULL_TREE); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + + /* Generate call to the run-time library (e.g. __asan_report_load8). */ + + gsi = gsi_last_bb (cond_bb); + gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); + seq = NULL; + g = gimple_build_call (report_error_func (is_store, size_in_bytes), + 1, base_addr); + gimple_seq_add_stmt (&seq, g); + + /* Insert the check code in the THEN block. */ + + gsi = gsi_start_bb (then_bb); + gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); + + *iter = gsi_start_bb (join_bb); +} + +/* If T represents a memory access, add instrumentation code before ITER. + LOCATION is source code location. + IS_STORE is either 1 (for a store) or 0 (for a load). */ + +static void +instrument_derefs (gimple_stmt_iterator *iter, tree t, + location_t location, int is_store) +{ + tree type, base; + int size_in_bytes; + + type = TREE_TYPE (t); + if (type == error_mark_node) + return; + switch (TREE_CODE (t)) + { + case ARRAY_REF: + case COMPONENT_REF: + case INDIRECT_REF: + case MEM_REF: + break; + default: + return; + } + size_in_bytes = tree_low_cst (TYPE_SIZE (type), 0) / BITS_PER_UNIT; + if (size_in_bytes != 1 && size_in_bytes != 2 && + size_in_bytes != 4 && size_in_bytes != 8 && size_in_bytes != 16) + return; + { + /* For now just avoid instrumenting bit field acceses. + Fixing it is doable, but expected to be messy. */ + + HOST_WIDE_INT bitsize, bitpos; + tree offset; + enum machine_mode mode; + int volatilep = 0, unsignedp = 0; + get_inner_reference (t, &bitsize, &bitpos, &offset, + &mode, &unsignedp, &volatilep, false); + if (bitpos != 0 || bitsize != size_in_bytes * BITS_PER_UNIT) + return; + } + + base = build_addr (t, current_function_decl); + build_check_stmt (base, iter, location, is_store, size_in_bytes); +} + +/* asan: this looks too complex. Can this be done simpler? */ +/* Transform + 1) Memory references. + 2) BUILTIN_ALLOCA calls. +*/ + +static void +transform_statements (void) +{ + basic_block bb; + gimple_stmt_iterator i; + int saved_last_basic_block = last_basic_block; + enum gimple_rhs_class grhs_class; + + FOR_EACH_BB (bb) + { + if (bb->index >= saved_last_basic_block) continue; + for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i)) + { + gimple s = gsi_stmt (i); + if (gimple_code (s) != GIMPLE_ASSIGN) + continue; + instrument_derefs (&i, gimple_assign_lhs (s), + gimple_location (s), 1); + instrument_derefs (&i, gimple_assign_rhs1 (s), + gimple_location (s), 0); + grhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (s)); + if (grhs_class == GIMPLE_BINARY_RHS) + instrument_derefs (&i, gimple_assign_rhs2 (s), + gimple_location (s), 0); + } + } +} + +/* Module-level instrumentation. + - Insert __asan_init() into the list of CTORs. + - TODO: insert redzones around globals. + */ + +void +asan_finish_file (void) +{ + tree ctor_statements = NULL_TREE; + append_to_statement_list (build_call_expr (asan_init_func (), 0), + &ctor_statements); + cgraph_build_static_cdtor ('I', ctor_statements, + MAX_RESERVED_INIT_PRIORITY - 1); +} + +/* Instrument the current function. */ + +static unsigned int +asan_instrument (void) +{ + struct gimplify_ctx gctx; + tree uintptr_type = lang_hooks.types.type_for_mode (ptr_mode, true); + int is_64 = tree_low_cst (TYPE_SIZE (uintptr_type), 0) == 64; + asan_offset_log = is_64 ? asan_offset_log_64 : asan_offset_log_32; + push_gimplify_context (&gctx); + transform_statements (); + pop_gimplify_context (NULL); + return 0; +} + +static bool +gate_asan (void) +{ + return flag_asan != 0; +} + +struct gimple_opt_pass pass_asan = +{ + { + GIMPLE_PASS, + "asan", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + gate_asan, /* gate */ + asan_instrument, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_NONE, /* tv_id */ + PROP_ssa | PROP_cfg | PROP_gimple_leh,/* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_flow | TODO_verify_stmts + | TODO_update_ssa /* todo_flags_finish */ + } +}; diff --git a/gcc/asan.h b/gcc/asan.h new file mode 100644 index 0000000..590cf35 --- /dev/null +++ b/gcc/asan.h @@ -0,0 +1,26 @@ +/* AddressSanitizer, a fast memory error detector. + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Kostya Serebryany <kcc@google.com> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef TREE_ASAN +#define TREE_ASAN + +extern void asan_finish_file(void); + +#endif /* TREE_ASAN */ diff --git a/gcc/common.opt b/gcc/common.opt index f947a72..6088d1a 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -837,6 +837,10 @@ fargument-noalias-anything Common Ignore Does nothing. Preserved for backward compatibility. +faddress-sanitizer +Common Report Var(flag_asan) +Enable AddressSanitizer, a memory error detector + fasynchronous-unwind-tables Common Report Var(flag_asynchronous_unwind_tables) Optimization Generate unwind tables that are exact at each instruction boundary diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 9477ffc..ef2e69a 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -354,9 +354,10 @@ Objective-C and Objective-C++ Dialects}. @item Optimization Options @xref{Optimize Options,,Options that Control Optimization}. @gccoptlist{-falign-functions[=@var{n}] -falign-jumps[=@var{n}] @gol --falign-labels[=@var{n}] -falign-loops[=@var{n}] -fassociative-math @gol --fauto-inc-dec -fbranch-probabilities -fbranch-target-load-optimize @gol --fbranch-target-load-optimize2 -fbtr-bb-exclusive -fcaller-saves @gol +-falign-labels[=@var{n}] -falign-loops[=@var{n}] -faddress-sanitizer @gol +--fassociative-math fauto-inc-dec -fbranch-probabilities @gol +--fbranch-target-load-optimize fbranch-target-load-optimize2 @gol +--fbtr-bb-exclusive -fcaller-saves @gol -fcheck-data-deps -fcombine-stack-adjustments -fconserve-stack @gol -fcompare-elim -fcprop-registers -fcrossjumping @gol -fcse-follow-jumps -fcse-skip-blocks -fcx-fortran-rules @gol @@ -6848,6 +6849,12 @@ assumptions based on that. The default is @option{-fzero-initialized-in-bss}. +@item -faddress-sanitizer +Enable AddressSanitizer, a fast memory error detector. +Memory access instructions will be instrumented to detect +out-of-bounds and use-after-free bugs. So far only heap bugs will be detected. +See @uref{http://code.google.com/p/address-sanitizer/} for more details. + @item -fmudflap -fmudflapth -fmudflapir @opindex fmudflap @opindex fmudflapth diff --git a/gcc/passes.c b/gcc/passes.c index 67aae52..66a2f74 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -1456,6 +1456,7 @@ init_optimization_passes (void) NEXT_PASS (pass_split_crit_edges); NEXT_PASS (pass_pre); NEXT_PASS (pass_sink_code); + NEXT_PASS (pass_asan); NEXT_PASS (pass_tree_loop); { struct opt_pass **p = &pass_tree_loop.pass.sub; diff --git a/gcc/toplev.c b/gcc/toplev.c index 5cbb364..3ca0736 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -72,6 +72,7 @@ along with GCC; see the file COPYING3. If not see #include "value-prof.h" #include "alloc-pool.h" #include "tree-mudflap.h" +#include "asan.h" #include "gimple.h" #include "tree-ssa-alias.h" #include "plugin.h" @@ -570,6 +571,10 @@ compile_file (void) if (flag_mudflap) mudflap_finish_file (); + /* File-scope initialization for AddressSanitizer. */ + if (flag_asan) + asan_finish_file (); + output_shared_constant_pool (); output_object_blocks (); finish_tm_clone_pairs (); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 09ec531..0e61856 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -259,6 +259,7 @@ struct register_pass_info extern struct gimple_opt_pass pass_mudflap_1; extern struct gimple_opt_pass pass_mudflap_2; +extern struct gimple_opt_pass pass_asan; extern struct gimple_opt_pass pass_lower_cf; extern struct gimple_opt_pass pass_refactor_eh; extern struct gimple_opt_pass pass_lower_eh; |