aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDi Zhao <dizhao@os.amperecomputing.com>2024-01-02 12:35:03 +0800
committerDi Zhao <dizhao@os.amperecomputing.com>2024-01-02 12:35:03 +0800
commitb041bd4ec2cff7b6cfa0b27fc631cba8a02975e4 (patch)
treea692aaaafdca742d40a36d16ad4cd59fd458428e
parent6be6305fb6f1a0bf0c088302cc57cbd8aa411873 (diff)
downloadgcc-b041bd4ec2cff7b6cfa0b27fc631cba8a02975e4.zip
gcc-b041bd4ec2cff7b6cfa0b27fc631cba8a02975e4.tar.gz
gcc-b041bd4ec2cff7b6cfa0b27fc631cba8a02975e4.tar.bz2
aarch64: add 'AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA'
This patch adds a new tuning option 'AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA', to consider fully pipelined FMAs in reassociation. Also, set this option by default for Ampere CPUs. gcc/ChangeLog: * config/aarch64/aarch64-tuning-flags.def (AARCH64_EXTRA_TUNING_OPTION): New tuning option AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA. * config/aarch64/aarch64.cc (aarch64_override_options_internal): Set param_fully_pipelined_fma according to tuning option. * config/aarch64/tuning_models/ampere1.h: Add AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA to tune_flags. * config/aarch64/tuning_models/ampere1a.h: Likewise. * config/aarch64/tuning_models/ampere1b.h: Likewise.
-rw-r--r--gcc/config/aarch64/aarch64-tuning-flags.def2
-rw-r--r--gcc/config/aarch64/aarch64.cc6
-rw-r--r--gcc/config/aarch64/tuning_models/ampere1.h3
-rw-r--r--gcc/config/aarch64/tuning_models/ampere1a.h3
-rw-r--r--gcc/config/aarch64/tuning_models/ampere1b.h5
5 files changed, 15 insertions, 4 deletions
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index f28a738..1488a84 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -49,4 +49,6 @@ AARCH64_EXTRA_TUNING_OPTION ("matched_vector_throughput", MATCHED_VECTOR_THROUGH
AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA)
+AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", FULLY_PIPELINED_FMA)
+
#undef AARCH64_EXTRA_TUNING_OPTION
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 9858de6..298477d 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -18321,6 +18321,12 @@ aarch64_override_options_internal (struct gcc_options *opts)
SET_OPTION_IF_UNSET (opts, &global_options_set, param_avoid_fma_max_bits,
512);
+ /* Consider fully pipelined FMA in reassociation. */
+ if (aarch64_tune_params.extra_tuning_flags
+ & AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA)
+ SET_OPTION_IF_UNSET (opts, &global_options_set, param_fully_pipelined_fma,
+ 1);
+
aarch64_override_options_after_change_1 (opts);
}
diff --git a/gcc/config/aarch64/tuning_models/ampere1.h b/gcc/config/aarch64/tuning_models/ampere1.h
index a144e8f..ac215d3 100644
--- a/gcc/config/aarch64/tuning_models/ampere1.h
+++ b/gcc/config/aarch64/tuning_models/ampere1.h
@@ -104,7 +104,8 @@ static const struct tune_params ampere1_tunings =
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA
+ | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags. */
&ampere1_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/ampere1a.h b/gcc/config/aarch64/tuning_models/ampere1a.h
index f688ed0..0024960 100644
--- a/gcc/config/aarch64/tuning_models/ampere1a.h
+++ b/gcc/config/aarch64/tuning_models/ampere1a.h
@@ -56,7 +56,8 @@ static const struct tune_params ampere1a_tunings =
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA
+ | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags. */
&ampere1_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/ampere1b.h b/gcc/config/aarch64/tuning_models/ampere1b.h
index a98b6a9..15cc896 100644
--- a/gcc/config/aarch64/tuning_models/ampere1b.h
+++ b/gcc/config/aarch64/tuning_models/ampere1b.h
@@ -105,8 +105,9 @@ static const struct tune_params ampere1b_tunings =
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND |
- AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+ | AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA
+ | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags. */
&ampere1b_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */