aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog20
-rw-r--r--gcc/config/aarch64/aarch64-protos.h3
-rw-r--r--gcc/config/aarch64/aarch64.c13
-rw-r--r--gcc/doc/invoke.texi15
-rw-r--r--gcc/params.def9
-rw-r--r--gcc/params.h2
-rw-r--r--gcc/tree-ssa-loop-prefetch.c17
7 files changed, 78 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 12895be..403ee0e 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2018-05-23 Luis Machado <luis.machado@linaro.org>
+
+ * config/aarch64/aarch64-protos.h (cpu_prefetch_tune)
+ <minimum_stride>: New const int field.
+ * config/aarch64/aarch64.c (generic_prefetch_tune): Update to include
+ minimum_stride field defaulting to -1.
+ (exynosm1_prefetch_tune): Likewise.
+ (thunderxt88_prefetch_tune): Likewise.
+ (thunderx_prefetch_tune): Likewise.
+ (thunderx2t99_prefetch_tune): Likewise.
+ (qdf24xx_prefetch_tune) <minimum_stride>: Set to 2048.
+ <default_opt_level>: Set to 3.
+ (aarch64_override_options_internal): Update to set
+ PARAM_PREFETCH_MINIMUM_STRIDE.
+ * doc/invoke.texi (prefetch-minimum-stride): Document new option.
+ * params.def (PARAM_PREFETCH_MINIMUM_STRIDE): New.
+ * params.h (PARAM_PREFETCH_MINIMUM_STRIDE): Define.
+ * tree-ssa-loop-prefetch.c (should_issue_prefetch_p): Return false if
+ stride is constant and is below the minimum stride threshold.
+
2018-05-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/arm/arm-cpus.in (mode26): Delete.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 4ab6811..7e9fe3a 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -230,6 +230,9 @@ struct cpu_prefetch_tune
const int l1_cache_size;
const int l1_cache_line_size;
const int l2_cache_size;
+ /* The minimum constant stride beyond which we should use prefetch
+ hints for. */
+ const int minimum_stride;
const int default_opt_level;
};
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 7745192..561c407 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -550,6 +550,7 @@ static const cpu_prefetch_tune generic_prefetch_tune =
-1, /* l1_cache_size */
-1, /* l1_cache_line_size */
-1, /* l2_cache_size */
+ -1, /* minimum_stride */
-1 /* default_opt_level */
};
@@ -559,6 +560,7 @@ static const cpu_prefetch_tune exynosm1_prefetch_tune =
-1, /* l1_cache_size */
64, /* l1_cache_line_size */
-1, /* l2_cache_size */
+ -1, /* minimum_stride */
-1 /* default_opt_level */
};
@@ -568,7 +570,8 @@ static const cpu_prefetch_tune qdf24xx_prefetch_tune =
32, /* l1_cache_size */
64, /* l1_cache_line_size */
512, /* l2_cache_size */
- -1 /* default_opt_level */
+ 2048, /* minimum_stride */
+ 3 /* default_opt_level */
};
static const cpu_prefetch_tune thunderxt88_prefetch_tune =
@@ -577,6 +580,7 @@ static const cpu_prefetch_tune thunderxt88_prefetch_tune =
32, /* l1_cache_size */
128, /* l1_cache_line_size */
16*1024, /* l2_cache_size */
+ -1, /* minimum_stride */
3 /* default_opt_level */
};
@@ -586,6 +590,7 @@ static const cpu_prefetch_tune thunderx_prefetch_tune =
32, /* l1_cache_size */
128, /* l1_cache_line_size */
-1, /* l2_cache_size */
+ -1, /* minimum_stride */
-1 /* default_opt_level */
};
@@ -595,6 +600,7 @@ static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
32, /* l1_cache_size */
64, /* l1_cache_line_size */
256, /* l2_cache_size */
+ -1, /* minimum_stride */
-1 /* default_opt_level */
};
@@ -10629,6 +10635,11 @@ aarch64_override_options_internal (struct gcc_options *opts)
aarch64_tune_params.prefetch->l2_cache_size,
opts->x_param_values,
global_options_set.x_param_values);
+ if (aarch64_tune_params.prefetch->minimum_stride >= 0)
+ maybe_set_param_value (PARAM_PREFETCH_MINIMUM_STRIDE,
+ aarch64_tune_params.prefetch->minimum_stride,
+ opts->x_param_values,
+ global_options_set.x_param_values);
/* Use the alternative scheduling-pressure algorithm by default. */
maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 30d2a31..d1e50a0 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -10734,6 +10734,21 @@ The size of L1 cache, in kilobytes.
@item l2-cache-size
The size of L2 cache, in kilobytes.
+@item prefetch-minimum-stride
+Minimum constant stride, in bytes, to start using prefetch hints for. If
+the stride is less than this threshold, prefetch hints will not be issued.
+
+This setting is useful for processors that have hardware prefetchers, in
+which case there may be conflicts between the hardware prefetchers and
+the software prefetchers. If the hardware prefetchers have a maximum
+stride they can handle, it should be used here to improve the use of
+software prefetchers.
+
+A value of -1, the default, means we don't have a threshold and therefore
+prefetch hints can be issued for any constant stride.
+
+This setting is only useful for strides that are known and constant.
+
@item loop-interchange-max-num-stmts
The maximum number of stmts in a loop to be interchanged.
diff --git a/gcc/params.def b/gcc/params.def
index 5c4e2c9..6b208bf 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -795,6 +795,15 @@ DEFPARAM (PARAM_L2_CACHE_SIZE,
"The size of L2 cache.",
512, 0, 0)
+/* The minimum constant stride beyond which we should use prefetch hints
+ for. */
+
+DEFPARAM (PARAM_PREFETCH_MINIMUM_STRIDE,
+ "prefetch-minimum-stride",
+ "The minimum constant stride beyond which we should use prefetch "
+ "hints for.",
+ -1, 0, 0)
+
/* Maximum number of statements in loop nest for loop interchange. */
DEFPARAM (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS,
diff --git a/gcc/params.h b/gcc/params.h
index 98249d2..96012db 100644
--- a/gcc/params.h
+++ b/gcc/params.h
@@ -196,6 +196,8 @@ extern void init_param_values (int *params);
PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
#define L2_CACHE_SIZE \
PARAM_VALUE (PARAM_L2_CACHE_SIZE)
+#define PREFETCH_MINIMUM_STRIDE \
+ PARAM_VALUE (PARAM_PREFETCH_MINIMUM_STRIDE)
#define USE_CANONICAL_TYPES \
PARAM_VALUE (PARAM_USE_CANONICAL_TYPES)
#define IRA_MAX_LOOPS_NUM \
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
index 2f10db1..ac89bf7 100644
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@@ -992,6 +992,23 @@ prune_by_reuse (struct mem_ref_group *groups)
static bool
should_issue_prefetch_p (struct mem_ref *ref)
{
+ /* Some processors may have a hardware prefetcher that may conflict with
+ prefetch hints for a range of strides. Make sure we don't issue
+ prefetches for such cases if the stride is within this particular
+ range. */
+ if (cst_and_fits_in_hwi (ref->group->step)
+ && abs_hwi (int_cst_value (ref->group->step))
+ < (HOST_WIDE_INT) PREFETCH_MINIMUM_STRIDE)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Step for reference %u:%u (%ld) is less than the mininum "
+ "required stride of %d\n",
+ ref->group->uid, ref->uid, int_cst_value (ref->group->step),
+ PREFETCH_MINIMUM_STRIDE);
+ return false;
+ }
+
/* For now do not issue prefetches for only first few of the
iterations. */
if (ref->prefetch_before != PREFETCH_ALL)