diff options
author | Jennifer Schmitz <jschmitz@nvidia.com> | 2024-08-02 15:58:32 +0100 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2024-08-02 15:58:32 +0100 |
commit | 884846351c74dc79ab143a06c25f00fc7c9e3cfb (patch) | |
tree | 914cf1c262fe0325ebec486ea495483d59fa6b80 /gcc/config/aarch64 | |
parent | ba730fd10934e4ca004251aa3748bf9da4d35e62 (diff) | |
download | gcc-884846351c74dc79ab143a06c25f00fc7c9e3cfb.zip gcc-884846351c74dc79ab143a06c25f00fc7c9e3cfb.tar.gz gcc-884846351c74dc79ab143a06c25f00fc7c9e3cfb.tar.bz2 |
AArch64: Fuse CMP+CSEL and CMP+CSET for -mcpu=neoverse-v2
According to the Neoverse V2 Software Optimization Guide (section 4.14), the
instruction pairs CMP+CSEL and CMP+CSET can be fused, which had not been
implemented so far. This patch implements and tests the two fusion pairs.
The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
There was also no non-noise impact on SPEC CPU2017 benchmark.
OK for mainline?
Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com>
gcc/
* config/aarch64/aarch64.cc (aarch_macro_fusion_pair_p): Implement
fusion logic.
* config/aarch64/aarch64-fusion-pairs.def (cmp+csel): New entry.
(cmp+cset): Likewise.
* config/aarch64/tuning_models/neoversev2.h: Enable logic in
field fusible_ops.
gcc/testsuite/
* gcc.target/aarch64/fuse_cmp_csel.c: New test.
* gcc.target/aarch64/fuse_cmp_cset.c: Likewise.
Diffstat (limited to 'gcc/config/aarch64')
-rw-r--r-- | gcc/config/aarch64/aarch64-fusion-pairs.def | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.cc | 20 | ||||
-rw-r--r-- | gcc/config/aarch64/tuning_models/neoversev2.h | 5 |
3 files changed, 26 insertions, 1 deletions
diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def index 9a43b0c..bf5e85b 100644 --- a/gcc/config/aarch64/aarch64-fusion-pairs.def +++ b/gcc/config/aarch64/aarch64-fusion-pairs.def @@ -37,5 +37,7 @@ AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC) AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH) AARCH64_FUSION_PAIR ("alu+cbz", ALU_CBZ) AARCH64_FUSION_PAIR ("addsub_2reg_const1", ADDSUB_2REG_CONST1) +AARCH64_FUSION_PAIR ("cmp+csel", CMP_CSEL) +AARCH64_FUSION_PAIR ("cmp+cset", CMP_CSET) #undef AARCH64_FUSION_PAIR diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 113ebb4..9e12bd9 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -27357,6 +27357,26 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr))) return true; + /* Fuse CMP and CSEL/CSET. */ + if (prev_set && curr_set + && GET_CODE (SET_SRC (prev_set)) == COMPARE + && SCALAR_INT_MODE_P (GET_MODE (XEXP (SET_SRC (prev_set), 0))) + && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr))) + { + enum attr_type prev_type = get_attr_type (prev); + if ((prev_type == TYPE_ALUS_SREG || prev_type == TYPE_ALUS_IMM) + && ((aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_CSEL) + && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE + && aarch64_reg_or_zero (XEXP (SET_SRC (curr_set), 1), VOIDmode) + && aarch64_reg_or_zero (XEXP (SET_SRC (curr_set), 2), VOIDmode) + && SCALAR_INT_MODE_P (GET_MODE (XEXP (SET_SRC (curr_set), 1)))) + || (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_CSET) + && GET_RTX_CLASS (GET_CODE (SET_SRC (curr_set))) + == RTX_COMPARE + && REG_P (SET_DEST (curr_set))))) + return true; + } + /* Fuse flag-setting ALU instructions and conditional branch. */ if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH) && any_condjump_p (curr)) diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h index c9c3019..bd259a3 100644 --- a/gcc/config/aarch64/tuning_models/neoversev2.h +++ b/gcc/config/aarch64/tuning_models/neoversev2.h @@ -221,7 +221,10 @@ static const struct tune_params neoversev2_tunings = 2 /* store_pred. */ }, /* memmov_cost. */ 5, /* issue_rate */ - (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */ + (AARCH64_FUSE_AES_AESMC + | AARCH64_FUSE_CMP_BRANCH + | AARCH64_FUSE_CMP_CSEL + | AARCH64_FUSE_CMP_CSET), /* fusible_ops */ "32:16", /* function_align. */ "4", /* jump_align. */ "32:16", /* loop_align. */ |