diff options
author | Di Zhao <dizhao@os.amperecomputing.com> | 2024-01-02 12:35:03 +0800 |
---|---|---|
committer | Di Zhao <dizhao@os.amperecomputing.com> | 2024-01-02 12:35:03 +0800 |
commit | b041bd4ec2cff7b6cfa0b27fc631cba8a02975e4 (patch) | |
tree | a692aaaafdca742d40a36d16ad4cd59fd458428e | |
parent | 6be6305fb6f1a0bf0c088302cc57cbd8aa411873 (diff) | |
download | gcc-b041bd4ec2cff7b6cfa0b27fc631cba8a02975e4.zip gcc-b041bd4ec2cff7b6cfa0b27fc631cba8a02975e4.tar.gz gcc-b041bd4ec2cff7b6cfa0b27fc631cba8a02975e4.tar.bz2 |
aarch64: add 'AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA'
This patch adds a new tuning option
'AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA', to consider fully
pipelined FMAs in reassociation. Also, set this option by default
for Ampere CPUs.
gcc/ChangeLog:
* config/aarch64/aarch64-tuning-flags.def
(AARCH64_EXTRA_TUNING_OPTION): New tuning option
AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA.
* config/aarch64/aarch64.cc
(aarch64_override_options_internal): Set
param_fully_pipelined_fma according to tuning option.
* config/aarch64/tuning_models/ampere1.h: Add
AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA to tune_flags.
* config/aarch64/tuning_models/ampere1a.h: Likewise.
* config/aarch64/tuning_models/ampere1b.h: Likewise.
-rw-r--r-- | gcc/config/aarch64/aarch64-tuning-flags.def | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.cc | 6 | ||||
-rw-r--r-- | gcc/config/aarch64/tuning_models/ampere1.h | 3 | ||||
-rw-r--r-- | gcc/config/aarch64/tuning_models/ampere1a.h | 3 | ||||
-rw-r--r-- | gcc/config/aarch64/tuning_models/ampere1b.h | 5 |
5 files changed, 15 insertions, 4 deletions
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def index f28a738..1488a84 100644 --- a/gcc/config/aarch64/aarch64-tuning-flags.def +++ b/gcc/config/aarch64/aarch64-tuning-flags.def @@ -49,4 +49,6 @@ AARCH64_EXTRA_TUNING_OPTION ("matched_vector_throughput", MATCHED_VECTOR_THROUGH AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA) +AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", FULLY_PIPELINED_FMA) + #undef AARCH64_EXTRA_TUNING_OPTION diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 9858de6..298477d 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -18321,6 +18321,12 @@ aarch64_override_options_internal (struct gcc_options *opts) SET_OPTION_IF_UNSET (opts, &global_options_set, param_avoid_fma_max_bits, 512); + /* Consider fully pipelined FMA in reassociation. */ + if (aarch64_tune_params.extra_tuning_flags + & AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA) + SET_OPTION_IF_UNSET (opts, &global_options_set, param_fully_pipelined_fma, + 1); + aarch64_override_options_after_change_1 (opts); } diff --git a/gcc/config/aarch64/tuning_models/ampere1.h b/gcc/config/aarch64/tuning_models/ampere1.h index a144e8f..ac215d3 100644 --- a/gcc/config/aarch64/tuning_models/ampere1.h +++ b/gcc/config/aarch64/tuning_models/ampere1.h @@ -104,7 +104,8 @@ static const struct tune_params ampere1_tunings = 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */ + (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA + | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags. */ &ere1_prefetch_tune, AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */ AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */ diff --git a/gcc/config/aarch64/tuning_models/ampere1a.h b/gcc/config/aarch64/tuning_models/ampere1a.h index f688ed0..0024960 100644 --- a/gcc/config/aarch64/tuning_models/ampere1a.h +++ b/gcc/config/aarch64/tuning_models/ampere1a.h @@ -56,7 +56,8 @@ static const struct tune_params ampere1a_tunings = 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */ + (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA + | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags. */ &ere1_prefetch_tune, AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */ AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */ diff --git a/gcc/config/aarch64/tuning_models/ampere1b.h b/gcc/config/aarch64/tuning_models/ampere1b.h index a98b6a9..15cc896 100644 --- a/gcc/config/aarch64/tuning_models/ampere1b.h +++ b/gcc/config/aarch64/tuning_models/ampere1b.h @@ -105,8 +105,9 @@ static const struct tune_params ampere1b_tunings = 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND | - AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */ + (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND + | AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA + | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags. */ &ere1b_prefetch_tune, AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */ AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */ |