diff options
-rw-r--r-- | gcc/ChangeLog | 34 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 14 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 6 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.c | 5 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.h | 13 | ||||
-rw-r--r-- | gcc/config/sh/sh.h | 4 | ||||
-rw-r--r-- | gcc/config/sparc/sparc.c | 15 | ||||
-rw-r--r-- | gcc/config/sparc/sparc.h | 13 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 15 | ||||
-rw-r--r-- | gcc/params.c | 5 | ||||
-rw-r--r-- | gcc/params.def | 31 | ||||
-rw-r--r-- | gcc/params.h | 15 | ||||
-rw-r--r-- | gcc/toplev.c | 4 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-prefetch.c | 32 |
14 files changed, 153 insertions, 53 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e1a2a65..d3363d1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,37 @@ +2006-11-12 Zdenek Dvorak <dvorakz@suse.cz> + + * params.c (set_param_value): Initialize the "set" field. + * params.h (struct param_info): Add "set" field. + (PARAM_SET_P): New macro. + (PREFETCH_LATENCY, SIMULTANEOUS_PREFETCHES, L1_CACHE_SIZE, + L1_CACHE_LINE_SIZE): New macros. + * toplev.c (DEFPARAM): Initialize the "set" field. + * tree-ssa-loop-prefetch.c (PREFETCH_LATENCY, + SIMULTANEOUS_PREFETCHES): Removed. + (PREFETCH_BLOCK): Use L1_CACHE_LINE_SIZE. + (tree_ssa_prefetch_arrays): Dump the values of the parameters. + * config/sparc/sparc.c: Include params.h. + (sparc_override_options): Set SIMULTANEOUS_PREFETCHES and + L1_CACHE_LINE_SIZE parameters. + * config/sparc/sparc.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES): + Removed. + * config/i386/i386.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES): + Removed. + * config/i386/i386.c: Include params.h. + (k8_cost): Change default value for SIMULTANEOUS_PREFETCHES. + (override_options): Set SIMULTANEOUS_PREFETCHES and + L1_CACHE_LINE_SIZE parameters. + * config/sh/sh.h (SIMULTANEOUS_PREFETCHES): Removed. + (OPTIMIZATION_OPTIONS): Set SIMULTANEOUS_PREFETCHES and + L1_CACHE_LINE_SIZE parameters. + * config/ia64/ia64.c (ia64_optimization_options): Set + SIMULTANEOUS_PREFETCHES and L1_CACHE_LINE_SIZE parameters. + * config/ia64/ia64.h (SIMULTANEOUS_PREFETCHES, PREFETCH_BLOCK): + Removed. + * params.def (PARAM_PREFETCH_LATENCY, PARAM_SIMULTANEOUS_PREFETCHES, + PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE): New params. + * doc/invoke.texi: Document new params. + 2006-11-12 Roger Sayle <roger@eyesopen.com> PR tree-optimization/13827 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c797ff7..74af060d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -50,6 +50,7 @@ Boston, MA 02110-1301, USA. */ #include "tree-gimple.h" #include "dwarf2.h" #include "tm-constrs.h" +#include "params.h" #ifndef CHECK_STACK_LIMIT #define CHECK_STACK_LIMIT (-1) @@ -536,7 +537,12 @@ struct processor_costs k8_cost = { in SImode, DImode and TImode */ 5, /* MMX or SSE register to integer */ 64, /* size of prefetch block */ - 6, /* number of parallel prefetches */ + /* New AMD processors neer drop prefetches; if they cannot be performed + immediately, they are queued. We set number of simultaneous prefetches + to a large constant to reflect this (it probably is not a good idea not + to limit number of prefetches at all, as their execution also takes some + time). */ + 100, /* number of parallel prefetches */ 5, /* Branch cost */ COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ COSTS_N_INSNS (4), /* cost of FMUL instruction. */ @@ -2063,6 +2069,12 @@ override_options (void) so it won't slow down the compilation and make x87 code slower. */ if (!TARGET_SCHEDULE) flag_schedule_insns_after_reload = flag_schedule_insns = 0; + + if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES)) + set_param_value ("simultaneous-prefetches", + ix86_cost->simultaneous_prefetches); + if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE)) + set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block); } /* switch to the appropriate section for output of DECL. diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index b40088a..c7115109 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1739,12 +1739,6 @@ do { \ /* Define this as 1 if `char' should by default be signed; else as 0. */ #define DEFAULT_SIGNED_CHAR 1 -/* Number of bytes moved into a data cache for a single prefetch operation. */ -#define PREFETCH_BLOCK ix86_cost->prefetch_block - -/* Number of prefetch operations that can be done in parallel. */ -#define SIMULTANEOUS_PREFETCHES ix86_cost->simultaneous_prefetches - /* Max number of bytes we can move from memory to memory in one reasonably fast instruction. */ #define MOVE_MAX 16 diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index 2867e6f..53d90d2 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -9798,6 +9798,11 @@ ia64_optimization_options (int level ATTRIBUTE_UNUSED, { /* Let the scheduler form additional regions. */ set_param_value ("max-sched-extend-regions-iters", 2); + + /* Set the default values for cache-related parameters. */ + set_param_value ("simultaneous-prefetches", 6); + set_param_value ("l1-cache-line-size", 32); + } #include "gt-ia64.h" diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h index c9bb417..327a313 100644 --- a/gcc/config/ia64/ia64.h +++ b/gcc/config/ia64/ia64.h @@ -1979,19 +1979,6 @@ do { \ #pragma weak. Note, #pragma weak will only be supported if SUPPORT_WEAK is defined. */ -/* If this architecture supports prefetch, define this to be the number of - prefetch commands that can be executed in parallel. - - ??? This number is bogus and needs to be replaced before the value is - actually used in optimizations. */ - -#define SIMULTANEOUS_PREFETCHES 6 - -/* If this architecture supports prefetch, define this to be the size of - the cache line that is prefetched. */ - -#define PREFETCH_BLOCK 32 - #define HANDLE_SYSV_PRAGMA 1 /* A C expression for the maximum number of instructions to execute via diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index 1b659c7..5f8412f 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -495,6 +495,8 @@ do { \ the user explicitly requested this to be on or off. */ \ if (flag_schedule_insns > 0) \ flag_schedule_insns = 2; \ + \ + set_param_value ("simultaneous-prefetches", 2); \ } while (0) #define ASSEMBLER_DIALECT assembler_dialect @@ -3467,8 +3469,6 @@ extern int current_function_interrupt; 2:\n" TEXT_SECTION_ASM_OP); #endif /* (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__ */ -#define SIMULTANEOUS_PREFETCHES 2 - /* FIXME: middle-end support for highpart optimizations is missing. */ #define high_life_started reload_in_progress diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index 7ee5bcd..b26b59b 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -51,6 +51,7 @@ Boston, MA 02110-1301, USA. */ #include "cfglayout.h" #include "tree-gimple.h" #include "langhooks.h" +#include "params.h" /* Processor costs */ static const @@ -827,6 +828,20 @@ sparc_override_options (void) if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) target_flags |= MASK_LONG_DOUBLE_128; #endif + + if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES)) + set_param_value ("simultaneous-prefetches", + ((sparc_cpu == PROCESSOR_ULTRASPARC + || sparc_cpu == PROCESSOR_NIAGARA) + ? 2 + : (sparc_cpu == PROCESSOR_ULTRASPARC3 + ? 8 : 3))); + if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE)) + set_param_value ("l1-cache-line-size", + ((sparc_cpu == PROCESSOR_ULTRASPARC + || sparc_cpu == PROCESSOR_ULTRASPARC3 + || sparc_cpu == PROCESSOR_NIAGARA) + ? 64 : 32)); } #ifdef SUBTARGET_ATTRIBUTE_TABLE diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h index 8a2121c..936fac8 100644 --- a/gcc/config/sparc/sparc.h +++ b/gcc/config/sparc/sparc.h @@ -2175,19 +2175,6 @@ do { \ : (sparc_cpu == PROCESSOR_NIAGARA \ ? 4 \ : 3))) - -#define PREFETCH_BLOCK \ - ((sparc_cpu == PROCESSOR_ULTRASPARC \ - || sparc_cpu == PROCESSOR_ULTRASPARC3 \ - || sparc_cpu == PROCESSOR_NIAGARA) \ - ? 64 : 32) - -#define SIMULTANEOUS_PREFETCHES \ - ((sparc_cpu == PROCESSOR_ULTRASPARC \ - || sparc_cpu == PROCESSOR_NIAGARA) \ - ? 2 \ - : (sparc_cpu == PROCESSOR_ULTRASPARC3 \ - ? 8 : 3)) /* Control the assembler format that we output. */ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 8c9174e..544467e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -6329,6 +6329,21 @@ duplicated when threading jumps. Maximum number of fields in a structure we will treat in a field sensitive manner during pointer analysis. +@item prefetch-latency +Estimate on average number of instructions that are executed before +prefetch finishes. The distance we prefetch ahead is proportional +to this constant. Increasing this number may also lead to less +streams being prefetched (see @option{simultaneous-prefetches}). + +@item simultaneous-prefetches +Maximum number of prefetches that can run at the same time. + +@item l1-cache-line-size +The size of cache line in L1 cache, in bytes. + +@item l1-cache-size +The number of cache lines in L1 cache. + @end table @end table diff --git a/gcc/params.c b/gcc/params.c index 40daae7..fee0657 100644 --- a/gcc/params.c +++ b/gcc/params.c @@ -77,7 +77,10 @@ set_param_value (const char *name, int value) compiler_params[i].option, compiler_params[i].max_value); else - compiler_params[i].value = value; + { + compiler_params[i].value = value; + compiler_params[i].set = true; + } return; } diff --git a/gcc/params.def b/gcc/params.def index 488a4a9..764b3c4 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -594,6 +594,37 @@ DEFPARAM(PARAM_MAX_SCHED_READY_INSNS, "The maximum number of instructions ready to be issued to be considered by the scheduler during the first scheduling pass", 100, 0, 0) +/* Prefetching and cache-optimizations related parameters. Default values are + usually set by machine description. */ + +/* The number of insns executed before prefetch is completed. */ + +DEFPARAM (PARAM_PREFETCH_LATENCY, + "prefetch-latency", + "The number of insns executed before prefetch is completed", + 200, 0, 0) + +/* The number of prefetches that can run at the same time. */ + +DEFPARAM (PARAM_SIMULTANEOUS_PREFETCHES, + "simultaneous-prefetches", + "The number of prefetches that can run at the same time", + 3, 0, 0) + +/* The size of L1 cache in number of cache lines. */ + +DEFPARAM (PARAM_L1_CACHE_SIZE, + "l1-cache-size", + "The size of L1 cache", + 1024, 0, 0) + +/* The size of L1 cache line in bytes. */ + +DEFPARAM (PARAM_L1_CACHE_LINE_SIZE, + "l1-cache-line-size", + "The size of L1 cache line", + 32, 0, 0) + /* Local variables: mode:c diff --git a/gcc/params.h b/gcc/params.h index 32c9c8c..0d99755 100644 --- a/gcc/params.h +++ b/gcc/params.h @@ -49,6 +49,9 @@ typedef struct param_info /* The associated value. */ int value; + /* True if the parameter was explicitly set. */ + bool set; + /* Minimum acceptable value. */ int min_value; @@ -88,6 +91,10 @@ typedef enum compiler_param #define PARAM_VALUE(ENUM) \ (compiler_params[(int) ENUM].value) +/* True if the value of the parameter was explicitly changed. */ +#define PARAM_SET_P(ENUM) \ + (compiler_params[(int) ENUM].set) + /* Macros for the various parameters. */ #define SALIAS_MAX_IMPLICIT_FIELDS \ PARAM_VALUE (PARAM_SALIAS_MAX_IMPLICIT_FIELDS) @@ -151,4 +158,12 @@ typedef enum compiler_param ((size_t) PARAM_VALUE (PARAM_MAX_FIELDS_FOR_FIELD_SENSITIVE)) #define MAX_SCHED_READY_INSNS \ PARAM_VALUE (PARAM_MAX_SCHED_READY_INSNS) +#define PREFETCH_LATENCY \ + PARAM_VALUE (PARAM_PREFETCH_LATENCY) +#define SIMULTANEOUS_PREFETCHES \ + PARAM_VALUE (PARAM_SIMULTANEOUS_PREFETCHES) +#define L1_CACHE_SIZE \ + PARAM_VALUE (PARAM_L1_CACHE_SIZE) +#define L1_CACHE_LINE_SIZE \ + PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE) #endif /* ! GCC_PARAMS_H */ diff --git a/gcc/toplev.c b/gcc/toplev.c index 0f24ce3..b074a9f 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -387,10 +387,10 @@ const char *user_label_prefix; static const param_info lang_independent_params[] = { #define DEFPARAM(ENUM, OPTION, HELP, DEFAULT, MIN, MAX) \ - { OPTION, DEFAULT, MIN, MAX, HELP }, + { OPTION, DEFAULT, false, MIN, MAX, HELP }, #include "params.def" #undef DEFPARAM - { NULL, 0, 0, 0, NULL } + { NULL, 0, false, 0, 0, NULL } }; /* Output files for assembler code (real compiler output) diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index f34214e..41ada26 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -115,19 +115,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA /* Magic constants follow. These should be replaced by machine specific numbers. */ -/* A number that should roughly correspond to the number of instructions - executed before the prefetch is completed. */ - -#ifndef PREFETCH_LATENCY -#define PREFETCH_LATENCY 200 -#endif - -/* Number of prefetches that can run at the same time. */ - -#ifndef SIMULTANEOUS_PREFETCHES -#define SIMULTANEOUS_PREFETCHES 3 -#endif - /* True if write can be prefetched by a read prefetch. */ #ifndef WRITE_CAN_USE_READ_PREFETCH @@ -140,10 +127,12 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #define READ_CAN_USE_WRITE_PREFETCH 0 #endif -/* Cache line size. Assumed to be a power of two. */ +/* The size of the block loaded by a single prefetch. Usually, this is + the same as cache line size (at the moment, we only consider one level + of cache hierarchy). */ #ifndef PREFETCH_BLOCK -#define PREFETCH_BLOCK 32 +#define PREFETCH_BLOCK L1_CACHE_LINE_SIZE #endif /* Do we have a forward hardware sequential prefetching? */ @@ -1026,6 +1015,19 @@ tree_ssa_prefetch_arrays (struct loops *loops) || PREFETCH_BLOCK == 0) return 0; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Prefetching parameters:\n"); + fprintf (dump_file, " simultaneous prefetches: %d\n", + SIMULTANEOUS_PREFETCHES); + fprintf (dump_file, " prefetch latency: %d\n", PREFETCH_LATENCY); + fprintf (dump_file, " L1 cache size: %d (%d bytes)\n", + L1_CACHE_SIZE, L1_CACHE_SIZE * L1_CACHE_LINE_SIZE); + fprintf (dump_file, " L1 cache line size: %d\n", L1_CACHE_LINE_SIZE); + fprintf (dump_file, " prefetch block size: %d\n", PREFETCH_BLOCK); + fprintf (dump_file, "\n"); + } + initialize_original_copy_tables (); if (!built_in_decls[BUILT_IN_PREFETCH]) |