aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJennifer Schmitz <jschmitz@nvidia.com>2024-07-22 23:24:45 -0700
committerKyrylo Tkachov <ktkachov@nvidia.com>2024-07-24 17:07:25 +0530
commit4c5eb66e701bc9f3bf1298269f52559b10d63a09 (patch)
treeef7487fdd91041b38df1819fabaf201368e3f542
parent4efe43a61334e231bcd3cf9150cd844dbdf9ed20 (diff)
downloadgcc-4c5eb66e701bc9f3bf1298269f52559b10d63a09.zip
gcc-4c5eb66e701bc9f3bf1298269f52559b10d63a09.tar.gz
gcc-4c5eb66e701bc9f3bf1298269f52559b10d63a09.tar.bz2
aarch64: Fuse CMP+CSEL and CMP+CSET for -mcpu=neoverse-v2
According to the Neoverse V2 Software Optimization Guide (section 4.14), the instruction pairs CMP+CSEL and CMP+CSET can be fused, which had not been implemented so far. This patch implements and tests the two fusion pairs. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. There was also no non-noise impact on SPEC CPU2017 benchmark. OK for mainline? Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> gcc/ * config/aarch64/aarch64.cc (aarch_macro_fusion_pair_p): Implement fusion logic. * config/aarch64/aarch64-fusion-pairs.def (cmp+csel): New entry. (cmp+cset): Likewise. * config/aarch64/tuning_models/neoversev2.h: Enable logic in field fusible_ops. gcc/testsuite/ * gcc.target/aarch64/cmp_csel_fuse.c: New test. * gcc.target/aarch64/cmp_cset_fuse.c: Likewise.
-rw-r--r--gcc/config/aarch64/aarch64-fusion-pairs.def2
-rw-r--r--gcc/config/aarch64/aarch64.cc19
-rw-r--r--gcc/config/aarch64/tuning_models/neoversev2.h5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/cmp_csel_fuse.c34
-rw-r--r--gcc/testsuite/gcc.target/aarch64/cmp_cset_fuse.c31
5 files changed, 90 insertions, 1 deletions
diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def
index 9a43b0c..bf5e85b 100644
--- a/gcc/config/aarch64/aarch64-fusion-pairs.def
+++ b/gcc/config/aarch64/aarch64-fusion-pairs.def
@@ -37,5 +37,7 @@ AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC)
AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH)
AARCH64_FUSION_PAIR ("alu+cbz", ALU_CBZ)
AARCH64_FUSION_PAIR ("addsub_2reg_const1", ADDSUB_2REG_CONST1)
+AARCH64_FUSION_PAIR ("cmp+csel", CMP_CSEL)
+AARCH64_FUSION_PAIR ("cmp+cset", CMP_CSET)
#undef AARCH64_FUSION_PAIR
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 9e51236..db598eb 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -27348,6 +27348,25 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
&& reg_referenced_p (SET_DEST (prev_set), PATTERN (curr)))
return true;
+ /* FUSE CMP and CSEL. */
+ if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_CSEL)
+ && prev_set && curr_set
+ && GET_CODE (SET_SRC (prev_set)) == COMPARE
+ && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
+ && REG_P (XEXP (SET_SRC (curr_set), 1))
+ && REG_P (XEXP (SET_SRC (curr_set), 2))
+ && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr)))
+ return true;
+
+ /* Fuse CMP and CSET. */
+ if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_CSET)
+ && prev_set && curr_set
+ && GET_CODE (SET_SRC (prev_set)) == COMPARE
+ && GET_RTX_CLASS (GET_CODE (SET_SRC (curr_set))) == RTX_COMPARE
+ && REG_P (SET_DEST (curr_set))
+ && reg_referenced_p (SET_DEST (prev_set), PATTERN (curr)))
+ return true;
+
/* Fuse flag-setting ALU instructions and conditional branch. */
if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
&& any_condjump_p (curr))
diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h
index f76e4ef..ae99fab 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -221,7 +221,10 @@ static const struct tune_params neoversev2_tunings =
2 /* store_pred. */
}, /* memmov_cost. */
5, /* issue_rate */
- (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
+ (AARCH64_FUSE_AES_AESMC
+ | AARCH64_FUSE_CMP_BRANCH
+ | AARCH64_FUSE_CMP_CSEL
+ | AARCH64_FUSE_CMP_CSET), /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/testsuite/gcc.target/aarch64/cmp_csel_fuse.c b/gcc/testsuite/gcc.target/aarch64/cmp_csel_fuse.c
new file mode 100644
index 0000000..f5e511e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/cmp_csel_fuse.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** f1:
+** ...
+** cmp w[0-9]+, w[0-9]+
+** csel w[0-9]+, w[0-9]+, w[0-9]+, le
+** ret
+*/
+int f1 (int a, int b, int c)
+{
+ int cmp = a > b;
+ int add1 = c + 3;
+ int add2 = c + 8;
+ return cmp ? add1 : add2;
+}
+
+/*
+** f2:
+** ...
+** cmp x[0-9]+, x[0-9]+
+** csel x[0-9]+, x[0-9]+, x[0-9]+, le
+** ret
+*/
+long long f2 (long long a, long long b, long long c)
+{
+ long long cmp = a > b;
+ long long add1 = c + 3;
+ long long add2 = c + 8;
+ return cmp ? add1 : add2;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/cmp_cset_fuse.c b/gcc/testsuite/gcc.target/aarch64/cmp_cset_fuse.c
new file mode 100644
index 0000000..04f1ce2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/cmp_cset_fuse.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** f1:
+** cmp w[0-9]+, w[0-9]+
+** cset w[0-9]+, gt
+** ...
+*/
+int g;
+int f1 (int a, int b)
+{
+ int cmp = a > b;
+ g = cmp + 1;
+ return cmp;
+}
+
+/*
+** f2:
+** cmp x[0-9]+, x[0-9]+
+** cset x[0-9]+, gt
+** ...
+*/
+long long h;
+long long f2 (long long a, long long b)
+{
+ long long cmp = a > b;
+ h = cmp + 1;
+ return cmp;
+}