aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorDi Zhao <dizhao@os.amperecomputing.com>2023-12-01 16:16:40 +0800
committerDi Zhao <dizhao@os.amperecomputing.com>2023-12-01 17:02:46 +0800
commit6563d6767ed0f702458f8975cd5d73676c4604cc (patch)
tree6f9fabbd0e345e03b15cc84b19096552a4b82c98 /gcc
parent82288550012c9382ddfd75a0b4b1bb467d445744 (diff)
downloadgcc-6563d6767ed0f702458f8975cd5d73676c4604cc.zip
gcc-6563d6767ed0f702458f8975cd5d73676c4604cc.tar.gz
gcc-6563d6767ed0f702458f8975cd5d73676c4604cc.tar.bz2
aarch64: modify Ampere CPU tunings on reassociation/FMA
1. Allow reassociation on FP additions. 2. Avoid generating loop-dependant FMA chains. Added a tuning option 'AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA' for this. gcc/ChangeLog: * config/aarch64/aarch64-tuning-flags.def (AARCH64_EXTRA_TUNING_OPTION): New tuning option to avoid cross-loop FMA. * config/aarch64/aarch64.cc (aarch64_override_options_internal): Set param_avoid_fma_max_bits according to tuning option. * config/aarch64/tuning_models/ampere1.h (ampere1_tunings): Modify tunings related with FMA. * config/aarch64/tuning_models/ampere1a.h (ampere1a_tunings): Likewise. * config/aarch64/tuning_models/ampere1b.h (ampere1b_tunings): Likewise.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-tuning-flags.def2
-rw-r--r--gcc/config/aarch64/aarch64.cc6
-rw-r--r--gcc/config/aarch64/tuning_models/ampere1.h2
-rw-r--r--gcc/config/aarch64/tuning_models/ampere1a.h4
-rw-r--r--gcc/config/aarch64/tuning_models/ampere1b.h5
5 files changed, 14 insertions, 5 deletions
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index 774568e..f28a738 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -47,4 +47,6 @@ AARCH64_EXTRA_TUNING_OPTION ("use_new_vector_costs", USE_NEW_VECTOR_COSTS)
AARCH64_EXTRA_TUNING_OPTION ("matched_vector_throughput", MATCHED_VECTOR_THROUGHPUT)
+AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA)
+
#undef AARCH64_EXTRA_TUNING_OPTION
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index bde21f7..0f83ec0 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -16083,6 +16083,12 @@ aarch64_override_options_internal (struct gcc_options *opts)
&& opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
opts->x_flag_prefetch_loop_arrays = 1;
+ /* Avoid loop-dependant FMA chains. */
+ if (aarch64_tune_params.extra_tuning_flags
+ & AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA)
+ SET_OPTION_IF_UNSET (opts, &global_options_set, param_avoid_fma_max_bits,
+ 512);
+
aarch64_override_options_after_change_1 (opts);
}
diff --git a/gcc/config/aarch64/tuning_models/ampere1.h b/gcc/config/aarch64/tuning_models/ampere1.h
index 8d2a1c6..a144e8f 100644
--- a/gcc/config/aarch64/tuning_models/ampere1.h
+++ b/gcc/config/aarch64/tuning_models/ampere1.h
@@ -104,7 +104,7 @@ static const struct tune_params ampere1_tunings =
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */
&ampere1_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/ampere1a.h b/gcc/config/aarch64/tuning_models/ampere1a.h
index c419ffb..f688ed0 100644
--- a/gcc/config/aarch64/tuning_models/ampere1a.h
+++ b/gcc/config/aarch64/tuning_models/ampere1a.h
@@ -50,13 +50,13 @@ static const struct tune_params ampere1a_tunings =
"32:16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
- 1, /* fma_reassoc_width. */
+ 4, /* fma_reassoc_width. */
2, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */
&ampere1_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/ampere1b.h b/gcc/config/aarch64/tuning_models/ampere1b.h
index c4928f5..a98b6a9 100644
--- a/gcc/config/aarch64/tuning_models/ampere1b.h
+++ b/gcc/config/aarch64/tuning_models/ampere1b.h
@@ -99,13 +99,14 @@ static const struct tune_params ampere1b_tunings =
"32:16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
- 1, /* fma_reassoc_width. */
+ 4, /* fma_reassoc_width. */
2, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND |
+ AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */
&ampere1b_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */