aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog42
-rw-r--r--gcc/Makefile.in4
-rw-r--r--gcc/common.opt4
-rw-r--r--gcc/doc/invoke.texi17
-rw-r--r--gcc/flags.h4
-rw-r--r--gcc/gcov-io.c7
-rw-r--r--gcc/opts.c15
-rw-r--r--gcc/passes.c3
-rw-r--r--gcc/profile.c67
-rw-r--r--gcc/rtl-profile.c34
-rw-r--r--gcc/toplev.c23
-rw-r--r--gcc/tree-profile.c8
-rw-r--r--gcc/value-prof.c395
-rw-r--r--gcc/value-prof.h33
14 files changed, 499 insertions, 157 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e091271..04d4fc45 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,47 @@
2004-09-01 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
+ * Makefile.in (rtl-profile.o, value-prof.o): Add GCC_H dependency.
+ * common.opt (fspeculative-prefetching): New.
+ * flags.h (flag_speculative_prefetching_set): Declare.
+ * gcov-io.c (gcov_write_counter, gcov_read_counter): Allow negative
+ values.
+ * opts.c (flag_sepculative_prefetching_set): New variable.
+ (common_handle_option): Handle -fspeculative-prefetching.
+ * passes.c (rest_of_compilation): Ditto.
+ * profile.c (instrument_values, compute_value_histograms, branch_prob):
+ Use vectors instead of arrays.
+ * toplev.c (process_options): Handle -fspeculative-prefetching.
+ * rtl-profile.c: Include ggc.h.
+ (rtl_gen_interval_profiler, rtl_gen_pow2_profiler,
+ rtl_gen_one_value_profiler_no_edge_manipulation,
+ rtl_gen_one_value_profiler, rtl_gen_const_delta_profiler): Type of
+ argument changed.
+ * tree-profile.c (tree_gen_interval_profiler, tree_gen_pow2_profiler,
+ tree_gen_one_value_profiler, tree_gen_const_delta_profiler): Type of
+ argument changed.
+ * value-prof.c: Include ggc.h.
+ (NOPREFETCH_RANGE_MIN, NOPREFETCH_RANGE_MAX): New
+ macros.
+ (insn_prefetch_values_to_profile, find_mem_reference_1,
+ find_mem_reference_2, find_mem_reference, gen_speculative_prefetch,
+ speculative_prefetching_transform): New.
+ (value_profile_transformations): Call speculative_prefetching_transform.
+ (insn_values_to_profile): Call insn_prefetch_values_to_profile.
+ (insn_divmod_values_to_profile, rtl_find_values_to_profile,
+ tree_find_values_to_profile, find_values to profile): Use vectors
+ instead of arrays.
+ (free_profiled_values): Removed.
+ * value-prof.h (struct histogram_value): Renamed to
+ struct histogram_value_t.
+ (histogram_value, histogram_values): New types.
+ (find_values_to_profile): Declaration changed.
+ (free_profiled_values): Removed.
+ (struct profile_hooks): Type of argument of the hooks changed to
+ histogram_value.
+ * doc/invoke.texi (-fspeculative-prefetching): Document.
+
+2004-09-01 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
+
PR rtl-optimization/16408
* gcse.c (replace_store_insn): Fix LIBCALL/RETVAL notes.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 40411f6..dd2333c 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1959,10 +1959,10 @@ tree-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
tree-pass.h $(TREE_FLOW_H) $(TIMEVAR_H)
rtl-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(RTL_H) $(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) function.h \
- toplev.h $(BASIC_BLOCK_H) $(COVERAGE_H) $(TREE_FLOW_H) value-prof.h
+ toplev.h $(BASIC_BLOCK_H) $(COVERAGE_H) $(TREE_FLOW_H) value-prof.h $(GGC_H)
value-prof.o : value-prof.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
$(BASIC_BLOCK_H) hard-reg-set.h value-prof.h $(EXPR_H) output.h $(FLAGS_H) \
- $(RECOG_H) insn-config.h $(OPTABS_H) $(REGS_H)
+ $(RECOG_H) insn-config.h $(OPTABS_H) $(REGS_H) $(GGC_H)
loop.o : loop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(FLAGS_H) $(LOOP_H) \
insn-config.h $(REGS_H) hard-reg-set.h $(RECOG_H) $(EXPR_H) \
real.h $(PREDICT_H) $(BASIC_BLOCK_H) function.h $(CFGLOOP_H) \
diff --git a/gcc/common.opt b/gcc/common.opt
index 2f615f7..e745964 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -752,6 +752,10 @@ fsingle-precision-constant
Common Report Var(flag_single_precision_constant)
Convert floating point constants to single precision constants
+fspeculative-prefetching
+Common Report Var(flag_speculative_prefetching)
+Use value profiling for speculative prefetching
+
; Emit code to probe the stack, to help detect stack overflow; also
; may cause large objects to be allocated dynamically.
fstack-check
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 96b0fce..b4efeb4 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -311,7 +311,7 @@ Objective-C and Objective-C++ Dialects}.
-fsched-stalled-insns=@var{n} -sched-stalled-insns-dep=@var{n} @gol
-fsched2-use-superblocks @gol
-fsched2-use-traces -freschedule-modulo-scheduled-loops @gol
--fsignaling-nans -fsingle-precision-constant @gol
+-fsignaling-nans -fsingle-precision-constant -fspeculative-prefetching @gol
-fstrength-reduce -fstrict-aliasing -ftracer -fthread-jumps @gol
-funroll-all-loops -funroll-loops -fpeel-loops @gol
-funswitch-loops -fold-unroll-loops -fold-unroll-all-loops @gol
@@ -5011,6 +5011,21 @@ and actually performs the optimizations based on them.
Currently the optimizations include specialization of division operation
using the knowledge about the value of the denominator.
+@item -fspeculative-prefetching
+@opindex fspeculative-prefetching
+If combined with @option{-fprofile-arcs}, it instructs the compiler to add
+a code to gather information about addresses of memory references in the
+program.
+
+With @option{-fbranch-probabilities}, it reads back the data gathered
+and issues prefetch instructions according to them. In addition to the opportunities
+noticed by @option{-fprefetch-loop-arrays}, it also notices more complicated
+memory access patterns -- for example accesses to the data stored in linked
+list whose elements are usually allocated sequentially.
+
+In order to prevent issuing double prefetches, usage of
+@option{-fspeculative-prefetching} implies @option{-fno-prefetch-loop-arrays}.
+
Enabled with @option{-fprofile-generate} and @option{-fprofile-use}.
@item -frename-registers
diff --git a/gcc/flags.h b/gcc/flags.h
index c0fcc3c..fb24035 100644
--- a/gcc/flags.h
+++ b/gcc/flags.h
@@ -257,6 +257,10 @@ extern int flag_remove_unreachable_functions;
/* Nonzero if we should track variables. */
extern int flag_var_tracking;
+/* True if flag_speculative_prefetching was set by user. Used to suppress
+ warning message in case flag was set by -fprofile-{generate,use}. */
+extern bool flag_speculative_prefetching_set;
+
/* A string that's used when a random name is required. NULL means
to make it really random. */
diff --git a/gcc/gcov-io.c b/gcc/gcov-io.c
index 3b4dcd6..7370f51 100644
--- a/gcc/gcov-io.c
+++ b/gcc/gcov-io.c
@@ -268,9 +268,6 @@ gcov_write_counter (gcov_type value)
buffer[1] = (gcov_unsigned_t) (value >> 32);
else
buffer[1] = 0;
-
- if (value < 0)
- gcov_var.error = -1;
}
#endif /* IN_LIBGCOV */
@@ -453,9 +450,7 @@ gcov_read_counter (void)
value |= ((gcov_type) from_file (buffer[1])) << 32;
else if (buffer[1])
gcov_var.error = -1;
-
- if (value < 0)
- gcov_var.error = -1;
+
return value;
}
diff --git a/gcc/opts.c b/gcc/opts.c
index b802bcf..3c49827 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -93,6 +93,7 @@ static const char undocumented_msg[] = N_("This switch lacks documentation");
static bool profile_arc_flag_set, flag_profile_values_set;
static bool flag_unroll_loops_set, flag_tracer_set;
static bool flag_value_profile_transformations_set;
+bool flag_speculative_prefetching_set;
static bool flag_peel_loops_set, flag_branch_probabilities_set;
/* Input file names. */
@@ -830,6 +831,10 @@ common_handle_option (size_t scode, const char *arg, int value)
flag_tracer = value;
if (!flag_value_profile_transformations_set)
flag_value_profile_transformations = value;
+#ifdef HAVE_prefetch
+ if (!flag_speculative_prefetching_set)
+ flag_speculative_prefetching = value;
+#endif
break;
case OPT_fprofile_generate:
@@ -839,6 +844,10 @@ common_handle_option (size_t scode, const char *arg, int value)
flag_profile_values = value;
if (!flag_value_profile_transformations_set)
flag_value_profile_transformations = value;
+#ifdef HAVE_prefetch
+ if (!flag_speculative_prefetching_set)
+ flag_speculative_prefetching = value;
+#endif
break;
case OPT_fprofile_values:
@@ -861,7 +870,11 @@ common_handle_option (size_t scode, const char *arg, int value)
break;
case OPT_fvpt:
- flag_value_profile_transformations_set = value;
+ flag_value_profile_transformations_set = true;
+ break;
+
+ case OPT_fspeculative_prefetching:
+ flag_speculative_prefetching_set = true;
break;
case OPT_frandom_seed:
diff --git a/gcc/passes.c b/gcc/passes.c
index 06c2d89..783e33d 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -1820,7 +1820,8 @@ rest_of_compilation (void)
if (flag_branch_probabilities
&& flag_profile_values
- && flag_value_profile_transformations)
+ && (flag_value_profile_transformations
+ || flag_speculative_prefetching))
rest_of_handle_value_profile_transformations ();
/* Remove the death notes created for vpt. */
diff --git a/gcc/profile.c b/gcc/profile.c
index 002e7a1..2200e76 100644
--- a/gcc/profile.c
+++ b/gcc/profile.c
@@ -119,9 +119,9 @@ static int total_num_branches;
/* Forward declarations. */
static void find_spanning_tree (struct edge_list *);
static unsigned instrument_edges (struct edge_list *);
-static void instrument_values (unsigned, struct histogram_value *);
+static void instrument_values (histogram_values);
static void compute_branch_probabilities (void);
-static void compute_value_histograms (unsigned, struct histogram_value *);
+static void compute_value_histograms (histogram_values);
static gcov_type * get_exec_counts (void);
static basic_block find_group (basic_block);
static void union_groups (basic_block, basic_block);
@@ -166,17 +166,18 @@ instrument_edges (struct edge_list *el)
return num_instr_edges;
}
-/* Add code to measure histograms list of VALUES of length N_VALUES. */
+/* Add code to measure histograms for values in list VALUES. */
static void
-instrument_values (unsigned n_values, struct histogram_value *values)
+instrument_values (histogram_values values)
{
unsigned i, t;
/* Emit code to generate the histograms before the insns. */
- for (i = 0; i < n_values; i++)
+ for (i = 0; i < VEC_length (histogram_value, values); i++)
{
- switch (values[i].type)
+ histogram_value hist = VEC_index (histogram_value, values, i);
+ switch (hist->type)
{
case HIST_TYPE_INTERVAL:
t = GCOV_COUNTER_V_INTERVAL;
@@ -197,25 +198,25 @@ instrument_values (unsigned n_values, struct histogram_value *values)
default:
abort ();
}
- if (!coverage_counter_alloc (t, values[i].n_counters))
+ if (!coverage_counter_alloc (t, hist->n_counters))
continue;
- switch (values[i].type)
+ switch (hist->type)
{
case HIST_TYPE_INTERVAL:
- (profile_hooks->gen_interval_profiler) (values + i, t, 0);
+ (profile_hooks->gen_interval_profiler) (hist, t, 0);
break;
case HIST_TYPE_POW2:
- (profile_hooks->gen_pow2_profiler) (values + i, t, 0);
+ (profile_hooks->gen_pow2_profiler) (hist, t, 0);
break;
case HIST_TYPE_SINGLE_VALUE:
- (profile_hooks->gen_one_value_profiler) (values + i, t, 0);
+ (profile_hooks->gen_one_value_profiler) (hist, t, 0);
break;
case HIST_TYPE_CONST_DELTA:
- (profile_hooks->gen_const_delta_profiler) (values + i, t, 0);
+ (profile_hooks->gen_const_delta_profiler) (hist, t, 0);
break;
default:
@@ -613,22 +614,27 @@ compute_branch_probabilities (void)
free_aux_for_blocks ();
}
-/* Load value histograms for N_VALUES values whose description is stored
- in VALUES array from .da file. */
+/* Load value histograms values whose description is stored in VALUES array
+ from .da file. */
+
static void
-compute_value_histograms (unsigned n_values, struct histogram_value *values)
+compute_value_histograms (histogram_values values)
{
unsigned i, j, t, any;
unsigned n_histogram_counters[GCOV_N_VALUE_COUNTERS];
gcov_type *histogram_counts[GCOV_N_VALUE_COUNTERS];
gcov_type *act_count[GCOV_N_VALUE_COUNTERS];
gcov_type *aact_count;
+ histogram_value hist;
for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
n_histogram_counters[t] = 0;
- for (i = 0; i < n_values; i++)
- n_histogram_counters[(int) (values[i].type)] += values[i].n_counters;
+ for (i = 0; i < VEC_length (histogram_value, values); i++)
+ {
+ hist = VEC_index (histogram_value, values, i);
+ n_histogram_counters[(int) hist->type] += hist->n_counters;
+ }
any = 0;
for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
@@ -649,25 +655,27 @@ compute_value_histograms (unsigned n_values, struct histogram_value *values)
if (!any)
return;
- for (i = 0; i < n_values; i++)
+ for (i = 0; i < VEC_length (histogram_value, values); i++)
{
rtx hist_list = NULL_RTX;
- t = (int) (values[i].type);
+
+ hist = VEC_index (histogram_value, values, i);
+ t = (int) hist->type;
/* FIXME: make this work for trees. */
if (!ir_type ())
{
aact_count = act_count[t];
- act_count[t] += values[i].n_counters;
- for (j = values[i].n_counters; j > 0; j--)
+ act_count[t] += hist->n_counters;
+ for (j = hist->n_counters; j > 0; j--)
hist_list = alloc_EXPR_LIST (0, GEN_INT (aact_count[j - 1]),
hist_list);
hist_list = alloc_EXPR_LIST (0,
- copy_rtx ((rtx)values[i].value), hist_list);
- hist_list = alloc_EXPR_LIST (0, GEN_INT (values[i].type), hist_list);
- REG_NOTES ((rtx)values[i].insn) =
+ copy_rtx ((rtx) hist->value), hist_list);
+ hist_list = alloc_EXPR_LIST (0, GEN_INT (hist->type), hist_list);
+ REG_NOTES ((rtx) hist->insn) =
alloc_EXPR_LIST (REG_VALUE_PROFILE, hist_list,
- REG_NOTES ((rtx)values[i].insn));
+ REG_NOTES ((rtx) hist->insn));
}
}
@@ -700,8 +708,7 @@ branch_prob (void)
unsigned num_edges, ignored_edges;
unsigned num_instrumented;
struct edge_list *el;
- unsigned n_values = 0;
- struct histogram_value *values = NULL;
+ histogram_values values = NULL;
total_num_times_called++;
@@ -960,13 +967,13 @@ branch_prob (void)
#undef BB_TO_GCOV_INDEX
if (flag_profile_values)
- find_values_to_profile (&n_values, &values);
+ find_values_to_profile (&values);
if (flag_branch_probabilities)
{
compute_branch_probabilities ();
if (flag_profile_values)
- compute_value_histograms (n_values, values);
+ compute_value_histograms (values);
}
remove_fake_edges ();
@@ -981,7 +988,7 @@ branch_prob (void)
abort ();
if (flag_profile_values)
- instrument_values (n_values, values);
+ instrument_values (values);
/* Commit changes done by instrumentation. */
if (ir_type ())
diff --git a/gcc/rtl-profile.c b/gcc/rtl-profile.c
index a53a004..2d0c69cc 100644
--- a/gcc/rtl-profile.c
+++ b/gcc/rtl-profile.c
@@ -62,6 +62,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "coverage.h"
#include "value-prof.h"
#include "tree.h"
+#include "ggc.h"
/* Output instructions as RTL to increment the edge execution count. */
@@ -93,8 +94,7 @@ rtl_gen_edge_profiler (int edgeno, edge e)
section for counters, BASE is offset of the counter position. */
static void
-rtl_gen_interval_profiler (struct histogram_value *value, unsigned tag,
- unsigned base)
+rtl_gen_interval_profiler (histogram_value value, unsigned tag, unsigned base)
{
unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
@@ -196,8 +196,7 @@ rtl_gen_interval_profiler (struct histogram_value *value, unsigned tag,
section for counters, BASE is offset of the counter position. */
static void
-rtl_gen_pow2_profiler (struct histogram_value *value, unsigned tag,
- unsigned base)
+rtl_gen_pow2_profiler (histogram_value value, unsigned tag, unsigned base)
{
unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
@@ -272,8 +271,8 @@ rtl_gen_pow2_profiler (struct histogram_value *value, unsigned tag,
section for counters, BASE is offset of the counter position. */
static rtx
-rtl_gen_one_value_profiler_no_edge_manipulation (struct histogram_value *value,
- unsigned tag, unsigned base)
+rtl_gen_one_value_profiler_no_edge_manipulation (histogram_value value,
+ unsigned tag, unsigned base)
{
unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
@@ -351,8 +350,7 @@ rtl_gen_one_value_profiler_no_edge_manipulation (struct histogram_value *value,
section for counters, BASE is offset of the counter position. */
static void
-rtl_gen_one_value_profiler (struct histogram_value *value, unsigned tag,
- unsigned base)
+rtl_gen_one_value_profiler (histogram_value value, unsigned tag, unsigned base)
{
edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
PREV_INSN ((rtx)value->insn));
@@ -368,10 +366,9 @@ rtl_gen_one_value_profiler (struct histogram_value *value, unsigned tag,
section for counters, BASE is offset of the counter position. */
static void
-rtl_gen_const_delta_profiler (struct histogram_value *value, unsigned tag,
- unsigned base)
+rtl_gen_const_delta_profiler (histogram_value value, unsigned tag, unsigned base)
{
- struct histogram_value one_value_delta;
+ histogram_value one_value_delta;
unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
rtx stored_value_ref, stored_value, tmp, uval;
@@ -393,13 +390,14 @@ rtl_gen_const_delta_profiler (struct histogram_value *value, unsigned tag,
copy_rtx (uval), copy_rtx (stored_value),
NULL_RTX, 0, OPTAB_WIDEN);
- one_value_delta.value = tmp;
- one_value_delta.mode = mode;
- one_value_delta.seq = NULL_RTX;
- one_value_delta.insn = value->insn;
- one_value_delta.type = HIST_TYPE_SINGLE_VALUE;
- emit_insn (rtl_gen_one_value_profiler_no_edge_manipulation (&one_value_delta,
- tag, base + 1));
+ one_value_delta = ggc_alloc (sizeof (*one_value_delta));
+ one_value_delta->value = tmp;
+ one_value_delta->mode = mode;
+ one_value_delta->seq = NULL_RTX;
+ one_value_delta->insn = value->insn;
+ one_value_delta->type = HIST_TYPE_SINGLE_VALUE;
+ emit_insn (rtl_gen_one_value_profiler_no_edge_manipulation (one_value_delta,
+ tag, base + 1));
emit_move_insn (copy_rtx (stored_value), uval);
sequence = get_insns ();
end_sequence ();
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 34db3bd..853f170 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -1730,6 +1730,17 @@ process_options (void)
if (flag_value_profile_transformations)
flag_profile_values = 1;
+ /* Speculative prefetching implies the value profiling. We also switch off
+ the prefetching in the loop optimizer, so that we do not emit double
+ prefetches. TODO -- we should teach these two to cooperate; the loop
+ based prefetching may sometimes do a better job, especially in connection
+ with reuse analysis. */
+ if (flag_speculative_prefetching)
+ {
+ flag_profile_values = 1;
+ flag_prefetch_loop_arrays = 0;
+ }
+
/* Warn about options that are not supported on this machine. */
#ifndef INSN_SCHEDULING
if (flag_schedule_insns || flag_schedule_insns_after_reload)
@@ -1898,12 +1909,24 @@ process_options (void)
warning ("-fprefetch-loop-arrays not supported for this target");
flag_prefetch_loop_arrays = 0;
}
+ if (flag_speculative_prefetching)
+ {
+ if (flag_speculative_prefetching_set)
+ WARNIng ("-fspeculative-prefetching not supported for this target");
+ flag_speculative_prefetching = 0;
+ }
#else
if (flag_prefetch_loop_arrays && !HAVE_prefetch)
{
warning ("-fprefetch-loop-arrays not supported for this target (try -march switches)");
flag_prefetch_loop_arrays = 0;
}
+ if (flag_speculative_prefetching && !HAVE_prefetch)
+ {
+ if (flag_speculative_prefetching_set)
+ warning ("-fspeculative-prefetching not supported for this target (try -march switches)");
+ flag_speculative_prefetching = 0;
+ }
#endif
/* This combination of options isn't handled for i386 targets and doesn't
diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c
index 1a7b744..7f18415 100644
--- a/gcc/tree-profile.c
+++ b/gcc/tree-profile.c
@@ -94,7 +94,7 @@ tree_gen_edge_profiler (int edgeno, edge e)
tag of the section for counters, BASE is offset of the counter position. */
static void
-tree_gen_interval_profiler (struct histogram_value *value ATTRIBUTE_UNUSED,
+tree_gen_interval_profiler (histogram_value value ATTRIBUTE_UNUSED,
unsigned tag ATTRIBUTE_UNUSED,
unsigned base ATTRIBUTE_UNUSED)
{
@@ -107,7 +107,7 @@ tree_gen_interval_profiler (struct histogram_value *value ATTRIBUTE_UNUSED,
of the section for counters, BASE is offset of the counter position. */
static void
-tree_gen_pow2_profiler (struct histogram_value *value ATTRIBUTE_UNUSED,
+tree_gen_pow2_profiler (histogram_value value ATTRIBUTE_UNUSED,
unsigned tag ATTRIBUTE_UNUSED,
unsigned base ATTRIBUTE_UNUSED)
{
@@ -120,7 +120,7 @@ tree_gen_pow2_profiler (struct histogram_value *value ATTRIBUTE_UNUSED,
section for counters, BASE is offset of the counter position. */
static void
-tree_gen_one_value_profiler (struct histogram_value *value ATTRIBUTE_UNUSED,
+tree_gen_one_value_profiler (histogram_value value ATTRIBUTE_UNUSED,
unsigned tag ATTRIBUTE_UNUSED,
unsigned base ATTRIBUTE_UNUSED)
{
@@ -134,7 +134,7 @@ tree_gen_one_value_profiler (struct histogram_value *value ATTRIBUTE_UNUSED,
section for counters, BASE is offset of the counter position. */
static void
-tree_gen_const_delta_profiler (struct histogram_value *value ATTRIBUTE_UNUSED,
+tree_gen_const_delta_profiler (histogram_value value ATTRIBUTE_UNUSED,
unsigned tag ATTRIBUTE_UNUSED,
unsigned base ATTRIBUTE_UNUSED)
{
diff --git a/gcc/value-prof.c b/gcc/value-prof.c
index 17f78f6..a01c1c9 100644
--- a/gcc/value-prof.c
+++ b/gcc/value-prof.c
@@ -33,11 +33,20 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "recog.h"
#include "optabs.h"
#include "regs.h"
+#include "ggc.h"
static struct value_prof_hooks *value_prof_hooks;
-/* In this file value profile based optimizations will be placed (none are
- here just now, but they are hopefully coming soon).
+/* In this file value profile based optimizations are placed. Currently the
+ following optimizations are implemented (for more detailed descriptions
+ see comments at value_profile_transformations):
+
+ 1) Division/modulo specialisation. Provided that we can determine that the
+ operands of the division have some special properties, we may use it to
+ produce more effective code.
+ 2) Speculative prefetching. If we are able to determine that the difference
+ between addresses accessed by a memory reference is usually constant, we
+ may add the prefetch instructions.
Every such optimization should add its requirements for profiled values to
insn_values_to_profile function. This function is called from branch_prob
@@ -52,35 +61,51 @@ static struct value_prof_hooks *value_prof_hooks;
-- the expression that is profiled
-- list of counters starting from the first one. */
-static void insn_divmod_values_to_profile (rtx, unsigned *,
- struct histogram_value **);
-static void insn_values_to_profile (rtx, unsigned *, struct histogram_value **);
+/* For speculative prefetching, the range in that we do not prefetch (because
+ we assume that it will be in cache anyway). The assymetry between min and
+ max range is trying to reflect the fact that the sequential prefetching
+ of the data is commonly done directly by hardware. Nevertheless, these
+ values are just a guess and should of course be target-specific. */
+
+#ifndef NOPREFETCH_RANGE_MIN
+#define NOPREFETCH_RANGE_MIN (-16)
+#endif
+#ifndef NOPREFETCH_RANGE_MAX
+#define NOPREFETCH_RANGE_MAX 32
+#endif
+
+static void insn_divmod_values_to_profile (rtx, histogram_values *);
+#ifdef HAVE_prefetch
+static bool insn_prefetch_values_to_profile (rtx, histogram_values *);
+static int find_mem_reference_1 (rtx *, void *);
+static void find_mem_reference_2 (rtx, rtx, void *);
+static bool find_mem_reference (rtx, rtx *, int *);
+#endif
+
+static void insn_values_to_profile (rtx, histogram_values *);
static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
rtx, gcov_type, int);
static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int);
static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
int, int, int);
+#ifdef HAVE_prefetch
+static rtx gen_speculative_prefetch (rtx, gcov_type, int);
+#endif
static bool divmod_fixed_value_transform (rtx insn);
static bool mod_pow2_value_transform (rtx);
static bool mod_subtract_transform (rtx);
+#ifdef HAVE_prefetch
+static bool speculative_prefetching_transform (rtx);
+#endif
-/* Release the list of VALUES of length N_VALUES for that we want to measure
- histograms. */
-void
-free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED,
- struct histogram_value *values)
-{
- free (values);
-}
-
/* Find values inside INSN for that we want to measure histograms for
- division/modulo optimization. */
+ division/modulo optimization and stores them to VALUES. */
static void
-insn_divmod_values_to_profile (rtx insn, unsigned *n_values,
- struct histogram_value **values)
+insn_divmod_values_to_profile (rtx insn, histogram_values *values)
{
rtx set, set_src, op1, op2;
enum machine_mode mode;
+ histogram_value hist;
if (!INSN_P (insn))
return;
@@ -108,30 +133,26 @@ insn_divmod_values_to_profile (rtx insn, unsigned *n_values,
/* Check for a special case where the divisor is power of 2. */
if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
{
- *values = xrealloc (*values,
- (*n_values + 1)
- * sizeof (struct histogram_value));
- (*values)[*n_values].value = op2;
- (*values)[*n_values].seq = NULL_RTX;
- (*values)[*n_values].mode = mode;
- (*values)[*n_values].insn = insn;
- (*values)[*n_values].type = HIST_TYPE_POW2;
- (*values)[*n_values].hdata.pow2.may_be_other = 1;
- (*n_values)++;
+ hist = ggc_alloc (sizeof (*hist));
+ hist->value = op2;
+ hist->seq = NULL_RTX;
+ hist->mode = mode;
+ hist->insn = insn;
+ hist->type = HIST_TYPE_POW2;
+ hist->hdata.pow2.may_be_other = 1;
+ VEC_safe_push (histogram_value, *values, hist);
}
/* Check whether the divisor is not in fact a constant. */
if (!CONSTANT_P (op2))
{
- *values = xrealloc (*values,
- (*n_values + 1)
- * sizeof (struct histogram_value));
- (*values)[*n_values].value = op2;
- (*values)[*n_values].mode = mode;
- (*values)[*n_values].seq = NULL_RTX;
- (*values)[*n_values].insn = insn;
- (*values)[*n_values].type = HIST_TYPE_SINGLE_VALUE;
- (*n_values)++;
+ hist = ggc_alloc (sizeof (*hist));
+ hist->value = op2;
+ hist->mode = mode;
+ hist->seq = NULL_RTX;
+ hist->insn = insn;
+ hist->type = HIST_TYPE_SINGLE_VALUE;
+ VEC_safe_push (histogram_value, *values, hist);
}
/* For mod, check whether it is not often a noop (or replaceable by
@@ -140,22 +161,20 @@ insn_divmod_values_to_profile (rtx insn, unsigned *n_values,
{
rtx tmp;
- *values = xrealloc (*values,
- (*n_values + 1)
- * sizeof (struct histogram_value));
+ hist = ggc_alloc (sizeof (*hist));
start_sequence ();
tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
- (*values)[*n_values].value = force_operand (tmp, NULL_RTX);
- (*values)[*n_values].seq = get_insns ();
+ hist->value = force_operand (tmp, NULL_RTX);
+ hist->seq = get_insns ();
end_sequence ();
- (*values)[*n_values].mode = mode;
- (*values)[*n_values].insn = insn;
- (*values)[*n_values].type = HIST_TYPE_INTERVAL;
- (*values)[*n_values].hdata.intvl.int_start = 0;
- (*values)[*n_values].hdata.intvl.steps = 2;
- (*values)[*n_values].hdata.intvl.may_be_less = 1;
- (*values)[*n_values].hdata.intvl.may_be_more = 1;
- (*n_values)++;
+ hist->mode = mode;
+ hist->insn = insn;
+ hist->type = HIST_TYPE_INTERVAL;
+ hist->hdata.intvl.int_start = 0;
+ hist->hdata.intvl.steps = 2;
+ hist->hdata.intvl.may_be_less = 1;
+ hist->hdata.intvl.may_be_more = 1;
+ VEC_safe_push (histogram_value, *values, hist);
}
return;
@@ -164,72 +183,162 @@ insn_divmod_values_to_profile (rtx insn, unsigned *n_values,
}
}
+#ifdef HAVE_prefetch
+
+/* Called from find_mem_reference through for_each_rtx, finds a memory
+ reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
+ to *RET and the traversing of the expression is interrupted by returning 1.
+ Otherwise 0 is returned. */
+
+static int
+find_mem_reference_1 (rtx *expr, void *ret)
+{
+ rtx *mem = ret;
+
+ if (GET_CODE (*expr) == MEM)
+ {
+ *mem = *expr;
+ return 1;
+ }
+ return 0;
+}
+
+/* Called form find_mem_reference through note_stores to find out whether
+ the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
+ FMR2_WRITE is set to true. */
+
+static int fmr2_write;
+static void
+find_mem_reference_2 (rtx expr, rtx pat ATTRIBUTE_UNUSED, void *mem)
+{
+ if (expr == mem)
+ fmr2_write = true;
+}
+
+/* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
+ if it is a write of the mem. Return false if no memory reference is found,
+ true otherwise. */
+
+static bool
+find_mem_reference (rtx insn, rtx *mem, int *write)
+{
+ *mem = NULL_RTX;
+ for_each_rtx (&PATTERN (insn), find_mem_reference_1, mem);
+
+ if (!*mem)
+ return false;
+
+ fmr2_write = false;
+ note_stores (PATTERN (insn), find_mem_reference_2, *mem);
+ *write = fmr2_write;
+ return true;
+}
+
+/* Find values inside INSN for that we want to measure histograms for
+ a speculative prefetching. Add them to the list VALUES.
+ Returns true if such we found any such value, false otherwise. */
+
+static bool
+insn_prefetch_values_to_profile (rtx insn, histogram_values *values)
+{
+ rtx mem, address;
+ int write;
+ histogram_value hist;
+
+ if (!INSN_P (insn))
+ return false;
+
+ if (!find_mem_reference (insn, &mem, &write))
+ return false;
+
+ address = XEXP (mem, 0);
+ if (side_effects_p (address))
+ return false;
+
+ if (CONSTANT_P (address))
+ return false;
+
+ hist = ggc_alloc (sizeof (*hist));
+ hist->value = address;
+ hist->mode = GET_MODE (address);
+ hist->seq = NULL_RTX;
+ hist->insn = insn;
+ hist->type = HIST_TYPE_CONST_DELTA;
+ VEC_safe_push (histogram_value, *values, hist);
+
+ return true;
+}
+#endif
/* Find values inside INSN for that we want to measure histograms and adds
them to list VALUES (increasing the record of its length in N_VALUES). */
static void
-insn_values_to_profile (rtx insn,
- unsigned *n_values,
- struct histogram_value **values)
+insn_values_to_profile (rtx insn, histogram_values *values)
{
if (flag_value_profile_transformations)
- insn_divmod_values_to_profile (insn, n_values, values);
+ insn_divmod_values_to_profile (insn, values);
+
+#ifdef HAVE_prefetch
+ if (flag_speculative_prefetching)
+ insn_prefetch_values_to_profile (insn, values);
+#endif
}
/* Find list of values for that we want to measure histograms. */
static void
-rtl_find_values_to_profile (unsigned *n_values, struct histogram_value **values)
+rtl_find_values_to_profile (histogram_values *values)
{
rtx insn;
unsigned i;
life_analysis (NULL, PROP_DEATH_NOTES);
- *n_values = 0;
- *values = NULL;
+ *values = VEC_alloc (histogram_value, 0);
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
- insn_values_to_profile (insn, n_values, values);
+ insn_values_to_profile (insn, values);
- for (i = 0; i < *n_values; i++)
+ for (i = 0; i < VEC_length (histogram_value, *values); i++)
{
- switch ((*values)[i].type)
+ histogram_value hist = VEC_index (histogram_value, *values, i);
+
+ switch (hist->type)
{
case HIST_TYPE_INTERVAL:
if (dump_file)
fprintf (dump_file,
"Interval counter for insn %d, range %d -- %d.\n",
- INSN_UID ((rtx)(*values)[i].insn),
- (*values)[i].hdata.intvl.int_start,
- ((*values)[i].hdata.intvl.int_start
- + (*values)[i].hdata.intvl.steps - 1));
- (*values)[i].n_counters = (*values)[i].hdata.intvl.steps +
- ((*values)[i].hdata.intvl.may_be_less ? 1 : 0) +
- ((*values)[i].hdata.intvl.may_be_more ? 1 : 0);
+ INSN_UID ((rtx)hist->insn),
+ hist->hdata.intvl.int_start,
+ (hist->hdata.intvl.int_start
+ + hist->hdata.intvl.steps - 1));
+ hist->n_counters = hist->hdata.intvl.steps +
+ (hist->hdata.intvl.may_be_less ? 1 : 0) +
+ (hist->hdata.intvl.may_be_more ? 1 : 0);
break;
case HIST_TYPE_POW2:
if (dump_file)
fprintf (dump_file,
"Pow2 counter for insn %d.\n",
- INSN_UID ((rtx)(*values)[i].insn));
- (*values)[i].n_counters
- = GET_MODE_BITSIZE ((*values)[i].mode)
- + ((*values)[i].hdata.pow2.may_be_other ? 1 : 0);
+ INSN_UID ((rtx)hist->insn));
+ hist->n_counters
+ = GET_MODE_BITSIZE (hist->mode)
+ + (hist->hdata.pow2.may_be_other ? 1 : 0);
break;
case HIST_TYPE_SINGLE_VALUE:
if (dump_file)
fprintf (dump_file,
"Single value counter for insn %d.\n",
- INSN_UID ((rtx)(*values)[i].insn));
- (*values)[i].n_counters = 3;
+ INSN_UID ((rtx)hist->insn));
+ hist->n_counters = 3;
break;
case HIST_TYPE_CONST_DELTA:
if (dump_file)
fprintf (dump_file,
"Constant delta counter for insn %d.\n",
- INSN_UID ((rtx)(*values)[i].insn));
- (*values)[i].n_counters = 4;
+ INSN_UID ((rtx)hist->insn));
+ hist->n_counters = 4;
break;
default:
@@ -300,6 +409,23 @@ rtl_find_values_to_profile (unsigned *n_values, struct histogram_value **values)
It would be possible to continue analogically for K * b for other small
K's, but it is probably not useful.
+ 5)
+
+ Read or write of mem[address], where the value of address changes usually
+ by a constant C != 0 between the following accesses to the computation; with
+ -fspeculative-prefetching we then add a prefetch of address + C before
+ the insn. This handles prefetching of several interesting cases in addition
+ to a simple prefetching for addresses that are induction variables, e. g.
+ linked lists allocated sequentially (even in case they are processed
+ recursively).
+
+ TODO -- we should also check whether there is not (usually) a small
+ difference with the adjacent memory references, so that we do
+ not issue overlapping prefetches. Also we should employ some
+ heuristics to eliminate cases where prefetching evidently spoils
+ the code.
+ -- it should somehow cooperate with the loop optimizer prefetching
+
TODO:
There are other useful cases that could be handled by a similar mechanism,
@@ -353,6 +479,11 @@ rtl_value_profile_transformations (void)
|| divmod_fixed_value_transform (insn)
|| mod_pow2_value_transform (insn)))
changed = true;
+#ifdef HAVE_prefetch
+ if (flag_speculative_prefetching
+ && speculative_prefetching_transform (insn))
+ changed = true;
+#endif
}
if (changed)
@@ -754,12 +885,118 @@ mod_subtract_transform (rtx insn)
return true;
}
+
+#ifdef HAVE_prefetch
+/* Generate code for transformation 5 for mem with ADDRESS and a constant
+ step DELTA. WRITE is true if the reference is a store to mem. */
+
+static rtx
+gen_speculative_prefetch (rtx address, gcov_type delta, int write)
+{
+ rtx tmp;
+ rtx sequence;
+
+ /* TODO: we do the prefetching for just one iteration ahead, which
+ often is not enough. */
+ start_sequence ();
+ if (offsettable_address_p (0, VOIDmode, address))
+ tmp = plus_constant (copy_rtx (address), delta);
+ else
+ {
+ tmp = simplify_gen_binary (PLUS, Pmode,
+ copy_rtx (address), GEN_INT (delta));
+ tmp = force_operand (tmp, NULL);
+ }
+ if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
+ (tmp, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
+ tmp = force_reg (Pmode, tmp);
+ emit_insn (gen_prefetch (tmp, GEN_INT (write), GEN_INT (3)));
+ sequence = get_insns ();
+ end_sequence ();
+
+ return sequence;
+}
+
+/* Do transform 5) on INSN if applicable. */
+
+static bool
+speculative_prefetching_transform (rtx insn)
+{
+ rtx histogram, value;
+ gcov_type val, count, all;
+ edge e;
+ rtx mem, address;
+ int write;
+
+ if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
+ return false;
+
+ if (!find_mem_reference (insn, &mem, &write))
+ return false;
+
+ address = XEXP (mem, 0);
+ if (side_effects_p (address))
+ return false;
+
+ if (CONSTANT_P (address))
+ return false;
+
+ for (histogram = REG_NOTES (insn);
+ histogram;
+ histogram = XEXP (histogram, 1))
+ if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
+ && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA))
+ break;
+
+ if (!histogram)
+ return false;
+
+ histogram = XEXP (XEXP (histogram, 0), 1);
+ value = XEXP (histogram, 0);
+ histogram = XEXP (histogram, 1);
+ /* Skip last value referenced. */
+ histogram = XEXP (histogram, 1);
+ val = INTVAL (XEXP (histogram, 0));
+ histogram = XEXP (histogram, 1);
+ count = INTVAL (XEXP (histogram, 0));
+ histogram = XEXP (histogram, 1);
+ all = INTVAL (XEXP (histogram, 0));
+
+ /* With that few executions we do not really have a reason to optimize the
+ statement, and more importantly, the data about differences of addresses
+ are spoiled by the first item that had no previous value to compare
+ with. */
+ if (all < 4)
+ return false;
+
+ /* We require that count is at least half of all; this means
+ that for the transformation to fire the value must be constant
+ at least 50% of time (and 75% gives the garantee of usage). */
+ if (!rtx_equal_p (address, value) || 2 * count < all)
+ return false;
+
+ /* If the difference is too small, it does not make too much sense to
+ prefetch, as the memory is probably already in cache. */
+ if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX)
+ return false;
+
+ if (dump_file)
+ fprintf (dump_file, "Speculative prefetching for insn %d\n",
+ INSN_UID (insn));
+
+ e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
+
+ insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e);
+
+ return true;
+}
+#endif /* HAVE_prefetch */
/* Connection to the outside world. */
/* Struct for IR-dependent hooks. */
struct value_prof_hooks {
/* Find list of values for which we want to measure histograms. */
- void (*find_values_to_profile) (unsigned *, struct histogram_value **);
+ void (*find_values_to_profile) (histogram_values *);
/* Identify and exploit properties of values that are hard to analyze
statically. See value-prof.c for more detail. */
@@ -783,10 +1020,8 @@ rtl_register_value_prof_hooks (void)
/* Tree-based versions are stubs for now. */
static void
-tree_find_values_to_profile (unsigned *n_values, struct histogram_value **values)
+tree_find_values_to_profile (histogram_values *values ATTRIBUTE_UNUSED)
{
- (void)n_values;
- (void)values;
abort ();
}
@@ -811,9 +1046,9 @@ tree_register_value_prof_hooks (void)
/* IR-independent entry points. */
void
-find_values_to_profile (unsigned *n_values, struct histogram_value **values)
+find_values_to_profile (histogram_values *values)
{
- (value_prof_hooks->find_values_to_profile) (n_values, values);
+ (value_prof_hooks->find_values_to_profile) (values);
}
bool
diff --git a/gcc/value-prof.h b/gcc/value-prof.h
index e71e5e5..60215fd 100644
--- a/gcc/value-prof.h
+++ b/gcc/value-prof.h
@@ -39,14 +39,15 @@ enum hist_type
/* The value to measure. */
/* The void *'s are either rtx or tree, depending on which IR is in use. */
-struct histogram_value
+struct histogram_value_t GTY(())
{
- void * value; /* The value to profile. */
- enum machine_mode mode; /* And its mode. */
- void * seq; /* Insns required to count the profiled value. */
- void * insn; /* Insn before that to measure. */
- enum hist_type type; /* Type of information to measure. */
- unsigned n_counters; /* Number of required counters. */
+ PTR GTY ((skip (""))) value; /* The value to profile. */
+ enum machine_mode mode; /* And its mode. */
+ PTR GTY ((skip (""))) seq; /* Insns required to count the
+ profiled value. */
+ PTR GTY ((skip (""))) insn; /* Insn before that to measure. */
+ enum hist_type type; /* Type of information to measure. */
+ unsigned n_counters; /* Number of required counters. */
union
{
struct
@@ -63,13 +64,18 @@ struct histogram_value
} hdata; /* Profiled information specific data. */
};
+typedef struct histogram_value_t *histogram_value;
+
+DEF_VEC_P(histogram_value);
+
+typedef VEC(histogram_value) *histogram_values;
+
/* Hooks registration. */
extern void rtl_register_value_prof_hooks (void);
extern void tree_register_value_prof_hooks (void);
/* IR-independent entry points. */
-extern void find_values_to_profile (unsigned *, struct histogram_value **);
-extern void free_profiled_values (unsigned, struct histogram_value *);
+extern void find_values_to_profile (histogram_values *);
extern bool value_profile_transformations (void);
/* External declarations for edge-based profiling. */
@@ -78,18 +84,17 @@ struct profile_hooks {
void (*gen_edge_profiler) (int, edge);
/* Insert code to increment the interval histogram counter. */
- void (*gen_interval_profiler) (struct histogram_value *, unsigned, unsigned);
+ void (*gen_interval_profiler) (histogram_value, unsigned, unsigned);
/* Insert code to increment the power of two histogram counter. */
- void (*gen_pow2_profiler) (struct histogram_value *, unsigned, unsigned);
+ void (*gen_pow2_profiler) (histogram_value, unsigned, unsigned);
/* Insert code to find the most common value. */
- void (*gen_one_value_profiler) (struct histogram_value *, unsigned, unsigned);
+ void (*gen_one_value_profiler) (histogram_value, unsigned, unsigned);
/* Insert code to find the most common value of a difference between two
evaluations of an expression. */
- void (*gen_const_delta_profiler) (struct histogram_value *, unsigned,
- unsigned);
+ void (*gen_const_delta_profiler) (histogram_value, unsigned, unsigned);
FILE * (*profile_dump_file) (void);
};