aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorMaxim Kuvyrkov <maxim.kuvyrkov@linaro.org>2017-06-16 09:30:43 +0000
committerMaxim Kuvyrkov <mkuvyrkov@gcc.gnu.org>2017-06-16 09:30:43 +0000
commit9d2c6e2eb72324b10160ce1117a8ab600be94bad (patch)
treef1b7174e6289a48cb0e9b65a00ac1eca952bec5e /gcc
parentb783399af92647bd0765b9d51afab04dea31344b (diff)
downloadgcc-9d2c6e2eb72324b10160ce1117a8ab600be94bad.zip
gcc-9d2c6e2eb72324b10160ce1117a8ab600be94bad.tar.gz
gcc-9d2c6e2eb72324b10160ce1117a8ab600be94bad.tar.bz2
Add prefetch configuration to aarch64 backend.
* config/aarch64/aarch64-protos.h (struct cpu_prefetch_tune): New tune structure. (struct tune_params): Use cpu_prefetch_tune instead of cache_line_size. [Unrelated to main purpose of the patch] Place the pointer field last to enable type checking errors when tune structure are wrongly merged. * config/aarch64/aarch64.c (generic_prefetch_tune,) (exynosm1_prefetch_tune, qdf24xx_prefetch_tune,) (thunderx2t99_prefetch_tune): New tune constants. (tune_params *_tunings): Update all tunings (no functional change). (aarch64_override_options_internal): Set PARAM_SIMULTANEOUS_PREFETCHES, PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE, and PARAM_L2_CACHE_SIZE from tunings structures. Change-Id: I1ddbac1863dcf078a2e5b14dd904debc76a7da94 From-SVN: r249240
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog15
-rw-r--r--gcc/config/aarch64/aarch64-protos.h16
-rw-r--r--gcc/config/aarch64/aarch64.c102
3 files changed, 105 insertions, 28 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d6f2d72..a78e736 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2017-06-16 Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>
+
+ * config/aarch64/aarch64-protos.h (struct cpu_prefetch_tune):
+ New tune structure.
+ (struct tune_params): Use cpu_prefetch_tune instead of cache_line_size.
+ [Unrelated to main purpose of the patch] Place the pointer field last
+ to enable type checking errors when tune structure are wrongly merged.
+ * config/aarch64/aarch64.c (generic_prefetch_tune,)
+ (exynosm1_prefetch_tune, qdf24xx_prefetch_tune,)
+ (thunderx2t99_prefetch_tune): New tune constants.
+ (tune_params *_tunings): Update all tunings (no functional change).
+ (aarch64_override_options_internal): Set PARAM_SIMULTANEOUS_PREFETCHES,
+ PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE, and PARAM_L2_CACHE_SIZE
+ from tunings structures.
+
2017-06-16 Jakub Jelinek <jakub@redhat.com>
PR sanitizer/81094
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index ac91865..bb06139 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -203,6 +203,15 @@ struct cpu_approx_modes
const unsigned int recip_sqrt; /* Reciprocal square root. */
};
+/* Cache prefetch settings for prefetch-loop-arrays. */
+struct cpu_prefetch_tune
+{
+ const int num_slots;
+ const int l1_cache_size;
+ const int l1_cache_line_size;
+ const int l2_cache_size;
+};
+
struct tune_params
{
const struct cpu_cost_table *insn_extra_cost;
@@ -224,9 +233,6 @@ struct tune_params
int min_div_recip_mul_df;
/* Value for aarch64_case_values_threshold; or 0 for the default. */
unsigned int max_case_values;
- /* Value for PARAM_L1_CACHE_LINE_SIZE; or 0 to use the default. */
- unsigned int cache_line_size;
-
/* An enum specifying how to take into account CPU autoprefetch capabilities
during instruction scheduling:
- AUTOPREFETCHER_OFF: Do not take autoprefetch capabilities into account.
@@ -244,6 +250,10 @@ struct tune_params
} autoprefetcher_model;
unsigned int extra_tuning_flags;
+
+ /* Place prefetch struct pointer at the end to enable type checking
+ errors when tune_params misses elements (e.g., from erroneous merges). */
+ const struct cpu_prefetch_tune *prefetch;
};
#define AARCH64_FUSION_PAIR(x, name) \
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 239ba72..e3296c0 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -526,6 +526,39 @@ static const cpu_approx_modes xgene1_approx_modes =
AARCH64_APPROX_ALL /* recip_sqrt */
};
+/* Generic prefetch settings (which disable prefetch). */
+static const cpu_prefetch_tune generic_prefetch_tune =
+{
+ 0, /* num_slots */
+ -1, /* l1_cache_size */
+ -1, /* l1_cache_line_size */
+ -1 /* l2_cache_size */
+};
+
+static const cpu_prefetch_tune exynosm1_prefetch_tune =
+{
+ 0, /* num_slots */
+ -1, /* l1_cache_size */
+ 64, /* l1_cache_line_size */
+ -1 /* l2_cache_size */
+};
+
+static const cpu_prefetch_tune qdf24xx_prefetch_tune =
+{
+ 0, /* num_slots */
+ -1, /* l1_cache_size */
+ 64, /* l1_cache_line_size */
+ -1 /* l2_cache_size */
+};
+
+static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
+{
+ 0, /* num_slots */
+ -1, /* l1_cache_size */
+ 64, /* l1_cache_line_size */
+ -1 /* l2_cache_size */
+};
+
static const struct tune_params generic_tunings =
{
&cortexa57_extra_costs,
@@ -546,9 +579,9 @@ static const struct tune_params generic_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
- 0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ &generic_prefetch_tune
};
static const struct tune_params cortexa35_tunings =
@@ -572,9 +605,9 @@ static const struct tune_params cortexa35_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
- 0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ &generic_prefetch_tune
};
static const struct tune_params cortexa53_tunings =
@@ -598,9 +631,9 @@ static const struct tune_params cortexa53_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
- 0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ &generic_prefetch_tune
};
static const struct tune_params cortexa57_tunings =
@@ -624,9 +657,9 @@ static const struct tune_params cortexa57_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
- 0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
+ &generic_prefetch_tune
};
static const struct tune_params cortexa72_tunings =
@@ -650,9 +683,9 @@ static const struct tune_params cortexa72_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
- 0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ &generic_prefetch_tune
};
static const struct tune_params cortexa73_tunings =
@@ -676,11 +709,13 @@ static const struct tune_params cortexa73_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
- 0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ &generic_prefetch_tune
};
+
+
static const struct tune_params exynosm1_tunings =
{
&exynosm1_extra_costs,
@@ -701,9 +736,9 @@ static const struct tune_params exynosm1_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
48, /* max_case_values. */
- 64, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ &exynosm1_prefetch_tune
};
static const struct tune_params thunderx_tunings =
@@ -726,9 +761,9 @@ static const struct tune_params thunderx_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
- 0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
+ &generic_prefetch_tune
};
static const struct tune_params xgene1_tunings =
@@ -751,9 +786,9 @@ static const struct tune_params xgene1_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
- 0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ &generic_prefetch_tune
};
static const struct tune_params qdf24xx_tunings =
@@ -777,9 +812,9 @@ static const struct tune_params qdf24xx_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
- 64, /* cache_line_size. */
tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ &qdf24xx_prefetch_tune
};
static const struct tune_params thunderx2t99_tunings =
@@ -802,9 +837,9 @@ static const struct tune_params thunderx2t99_tunings =
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
- 64, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ &thunderx2t99_prefetch_tune
};
/* Support for fine-grained override of the tuning structures. */
@@ -8747,10 +8782,27 @@ aarch64_override_options_internal (struct gcc_options *opts)
opts->x_param_values,
global_options_set.x_param_values);
- /* Set the L1 cache line size. */
- if (selected_cpu->tune->cache_line_size != 0)
+ /* Set up parameters to be used in prefetching algorithm. Do not
+ override the defaults unless we are tuning for a core we have
+ researched values for. */
+ if (aarch64_tune_params.prefetch->num_slots > 0)
+ maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+ aarch64_tune_params.prefetch->num_slots,
+ opts->x_param_values,
+ global_options_set.x_param_values);
+ if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
+ maybe_set_param_value (PARAM_L1_CACHE_SIZE,
+ aarch64_tune_params.prefetch->l1_cache_size,
+ opts->x_param_values,
+ global_options_set.x_param_values);
+ if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
- selected_cpu->tune->cache_line_size,
+ aarch64_tune_params.prefetch->l1_cache_line_size,
+ opts->x_param_values,
+ global_options_set.x_param_values);
+ if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
+ maybe_set_param_value (PARAM_L2_CACHE_SIZE,
+ aarch64_tune_params.prefetch->l2_cache_size,
opts->x_param_values,
global_options_set.x_param_values);