Add prefetch configuration to aarch64 backend.

* config/aarch64/aarch64-protos.h (struct cpu_prefetch_tune): New tune structure. (struct tune_params): Use cpu_prefetch_tune instead of cache_line_size. [Unrelated to main purpose of the patch] Place the pointer field last to enable type checking errors when tune structure are wrongly merged. * config/aarch64/aarch64.c (generic_prefetch_tune,) (exynosm1_prefetch_tune, qdf24xx_prefetch_tune,) (thunderx2t99_prefetch_tune): New tune constants. (tune_params *_tunings): Update all tunings (no functional change). (aarch64_override_options_internal): Set PARAM_SIMULTANEOUS_PREFETCHES, PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE, and PARAM_L2_CACHE_SIZE from tunings structures. Change-Id: I1ddbac1863dcf078a2e5b14dd904debc76a7da94 From-SVN: r249240
author: Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> 2017-06-16 09:30:43 +0000
committer: Maxim Kuvyrkov <mkuvyrkov@gcc.gnu.org> 2017-06-16 09:30:43 +0000
commit: 9d2c6e2eb72324b10160ce1117a8ab600be94bad (patch)
tree: f1b7174e6289a48cb0e9b65a00ac1eca952bec5e /gcc
parent: b783399af92647bd0765b9d51afab04dea31344b (diff)
download: gcc-9d2c6e2eb72324b10160ce1117a8ab600be94bad.zip
gcc-9d2c6e2eb72324b10160ce1117a8ab600be94bad.tar.gz
gcc-9d2c6e2eb72324b10160ce1117a8ab600be94bad.tar.bz2
3 files changed, 105 insertions, 28 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d6f2d72..a78e736 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2017-06-16  Maxim Kuvyrkov  <maxim.kuvyrkov@linaro.org>
+
+        * config/aarch64/aarch64-protos.h (struct cpu_prefetch_tune):
+        New tune structure.
+        (struct tune_params): Use cpu_prefetch_tune instead of cache_line_size.
+        [Unrelated to main purpose of the patch] Place the pointer field last
+        to enable type checking errors when tune structure are wrongly merged.
+        * config/aarch64/aarch64.c (generic_prefetch_tune,)
+        (exynosm1_prefetch_tune, qdf24xx_prefetch_tune,)
+        (thunderx2t99_prefetch_tune): New tune constants.
+        (tune_params *_tunings): Update all tunings (no functional change).
+        (aarch64_override_options_internal): Set PARAM_SIMULTANEOUS_PREFETCHES,
+        PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE, and PARAM_L2_CACHE_SIZE
+        from tunings structures.
+
 2017-06-16  Jakub Jelinek  <jakub@redhat.com>
 
 	PR sanitizer/81094
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index ac91865..bb06139 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -203,6 +203,15 @@ struct cpu_approx_modes
   const unsigned int recip_sqrt;	/* Reciprocal square root.  */
 };
 
+/* Cache prefetch settings for prefetch-loop-arrays.  */
+struct cpu_prefetch_tune
+{
+  const int num_slots;
+  const int l1_cache_size;
+  const int l1_cache_line_size;
+  const int l2_cache_size;
+};
+
 struct tune_params
 {
   const struct cpu_cost_table *insn_extra_cost;
@@ -224,9 +233,6 @@ struct tune_params
   int min_div_recip_mul_df;
   /* Value for aarch64_case_values_threshold; or 0 for the default.  */
   unsigned int max_case_values;
-  /* Value for PARAM_L1_CACHE_LINE_SIZE; or 0 to use the default.  */
-  unsigned int cache_line_size;
-
 /* An enum specifying how to take into account CPU autoprefetch capabilities
    during instruction scheduling:
    - AUTOPREFETCHER_OFF: Do not take autoprefetch capabilities into account.
@@ -244,6 +250,10 @@ struct tune_params
   } autoprefetcher_model;
 
   unsigned int extra_tuning_flags;
+
+  /* Place prefetch struct pointer at the end to enable type checking
+     errors when tune_params misses elements (e.g., from erroneous merges).  */
+  const struct cpu_prefetch_tune *prefetch;
 };
 
 #define AARCH64_FUSION_PAIR(x, name) \
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 239ba72..e3296c0 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -526,6 +526,39 @@ static const cpu_approx_modes xgene1_approx_modes =
   AARCH64_APPROX_ALL	/* recip_sqrt  */
 };
 
+/* Generic prefetch settings (which disable prefetch).  */
+static const cpu_prefetch_tune generic_prefetch_tune =
+{
+  0,			/* num_slots  */
+  -1,			/* l1_cache_size  */
+  -1,			/* l1_cache_line_size  */
+  -1			/* l2_cache_size  */
+};
+
+static const cpu_prefetch_tune exynosm1_prefetch_tune =
+{
+  0,			/* num_slots  */
+  -1,			/* l1_cache_size  */
+  64,			/* l1_cache_line_size  */
+  -1			/* l2_cache_size  */
+};
+
+static const cpu_prefetch_tune qdf24xx_prefetch_tune =
+{
+  0,			/* num_slots  */
+  -1,			/* l1_cache_size  */
+  64,			/* l1_cache_line_size  */
+  -1			/* l2_cache_size  */
+};
+
+static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
+{
+  0,			/* num_slots  */
+  -1,			/* l1_cache_size  */
+  64,			/* l1_cache_line_size  */
+  -1			/* l2_cache_size  */
+};
+
 static const struct tune_params generic_tunings =
 {
   &cortexa57_extra_costs,
@@ -546,9 +579,9 @@ static const struct tune_params generic_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_prefetch_tune
 };
 
 static const struct tune_params cortexa35_tunings =
@@ -572,9 +605,9 @@ static const struct tune_params cortexa35_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_prefetch_tune
 };
 
 static const struct tune_params cortexa53_tunings =
@@ -598,9 +631,9 @@ static const struct tune_params cortexa53_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_prefetch_tune
 };
 
 static const struct tune_params cortexa57_tunings =
@@ -624,9 +657,9 @@ static const struct tune_params cortexa57_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS),	/* tune_flags.  */
+  &generic_prefetch_tune
 };
 
 static const struct tune_params cortexa72_tunings =
@@ -650,9 +683,9 @@ static const struct tune_params cortexa72_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_prefetch_tune
 };
 
 static const struct tune_params cortexa73_tunings =
@@ -676,11 +709,13 @@ static const struct tune_params cortexa73_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_prefetch_tune
 };
 
+
+
 static const struct tune_params exynosm1_tunings =
 {
   &exynosm1_extra_costs,
@@ -701,9 +736,9 @@ static const struct tune_params exynosm1_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   48,	/* max_case_values.  */
-  64,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE) /* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE), /* tune_flags.  */
+  &exynosm1_prefetch_tune
 };
 
 static const struct tune_params thunderx_tunings =
@@ -726,9 +761,9 @@ static const struct tune_params thunderx_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW),	/* tune_flags.  */
+  &generic_prefetch_tune
 };
 
 static const struct tune_params xgene1_tunings =
@@ -751,9 +786,9 @@ static const struct tune_params xgene1_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_prefetch_tune
 };
 
 static const struct tune_params qdf24xx_tunings =
@@ -777,9 +812,9 @@ static const struct tune_params qdf24xx_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  64,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_STRONG,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE)		/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE),		/* tune_flags.  */
+  &qdf24xx_prefetch_tune
 };
 
 static const struct tune_params thunderx2t99_tunings =
@@ -802,9 +837,9 @@ static const struct tune_params thunderx2t99_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  64,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &thunderx2t99_prefetch_tune
 };
 
 /* Support for fine-grained override of the tuning structures.  */
@@ -8747,10 +8782,27 @@ aarch64_override_options_internal (struct gcc_options *opts)
 			 opts->x_param_values,
 			 global_options_set.x_param_values);
 
-  /* Set the L1 cache line size.  */
-  if (selected_cpu->tune->cache_line_size != 0)
+  /* Set up parameters to be used in prefetching algorithm.  Do not
+     override the defaults unless we are tuning for a core we have
+     researched values for.  */
+  if (aarch64_tune_params.prefetch->num_slots > 0)
+    maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+			   aarch64_tune_params.prefetch->num_slots,
+			   opts->x_param_values,
+			   global_options_set.x_param_values);
+  if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
+    maybe_set_param_value (PARAM_L1_CACHE_SIZE,
+			   aarch64_tune_params.prefetch->l1_cache_size,
+			   opts->x_param_values,
+			   global_options_set.x_param_values);
+  if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
-			   selected_cpu->tune->cache_line_size,
+			   aarch64_tune_params.prefetch->l1_cache_line_size,
+			   opts->x_param_values,
+			   global_options_set.x_param_values);
+  if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
+    maybe_set_param_value (PARAM_L2_CACHE_SIZE,
+			   aarch64_tune_params.prefetch->l2_cache_size,
 			   opts->x_param_values,
 			   global_options_set.x_param_values);
author	Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>	2017-06-16 09:30:43 +0000
committer	Maxim Kuvyrkov <mkuvyrkov@gcc.gnu.org>	2017-06-16 09:30:43 +0000
commit	9d2c6e2eb72324b10160ce1117a8ab600be94bad (patch)
tree	f1b7174e6289a48cb0e9b65a00ac1eca952bec5e /gcc
parent	b783399af92647bd0765b9d51afab04dea31344b (diff)
download	gcc-9d2c6e2eb72324b10160ce1117a8ab600be94bad.zip gcc-9d2c6e2eb72324b10160ce1117a8ab600be94bad.tar.gz gcc-9d2c6e2eb72324b10160ce1117a8ab600be94bad.tar.bz2