[AArch64] Enable CmpBcc fusion for Neoverse-v2 (#90608)

This adds compare and branch instructions fusion for Neoverse V2. According to the Software Optimization Guide: Specific Aarch64 instruction pairs that can be fused are as follows: CMP/CMN (immediate) + B.cond CMP/CMN (register) + B.cond Performance for SPEC2017 is neutral, but another benchmark improves significantly. Results for SPEC2017 on a Neoverse V2: 500.perlbench 0% 502.gcc_r 0% 505.mcf_r -0.15% 523.xalancbmk_r -0.43% 525.x264_r 0% 531.deepsjeng_r 0% 541.leela_r -0.16% 557.xz_r -0.47%
author: Elvina Yakubova <eyakubova@nvidia.com> 2024-06-04 15:09:42 +0100
committer: GitHub <noreply@github.com> 2024-06-04 15:09:42 +0100
commit: fadd1ec536ce76acfd572364b0e118da54116e94 (patch)
tree: 16cd9744e9eea8c206f34b739a38c31c5b012ad3 /llvm
parent: 2635d0419e4800c34c7cfea120a12fec8d4878fe (diff)
download: llvm-fadd1ec536ce76acfd572364b0e118da54116e94.zip
llvm-fadd1ec536ce76acfd572364b0e118da54116e94.tar.gz
llvm-fadd1ec536ce76acfd572364b0e118da54116e94.tar.bz2
2 files changed, 36 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index f2286ae..8d16709 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -484,6 +484,7 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1
 def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2",
                                       "Neoverse V2 ARM processors", [
                                       FeatureFuseAES,
+                                      FeatureCmpBccFusion,
                                       FeatureFuseAdrpAdd,
                                       FeatureALULSLFast,
                                       FeaturePostRAScheduler,
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll b/llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll
new file mode 100644
index 0000000..1034e07
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll
@@ -0,0 +1,35 @@
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mattr=cmp-bcc-fusion | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a77    | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a78    | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a78ae  | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a78c   | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a710   | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x715   | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x720   | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x720ae | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x1     | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x2     | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=neoverse-v2   | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=ampere1       | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=ampere1a      | FileCheck %s
+; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=ampere1b      | FileCheck %s
+
+
+define void @test_cmp_bcc_fusion(i32 %x, i32 %y, i32* %arr) {
+entry:
+  %cmp = icmp eq i32 %x, %y
+  store i32 %x, i32* %arr, align 4
+  br i1 %cmp, label %if_true, label %if_false
+
+if_true:
+  ret void
+
+if_false:
+  ret void
+}
+
+; CHECK-LABEL: test_cmp_bcc_fusion:
+; CHECK: str {{w[0-9]}}, [{{x[0-9]}}]
+; CHECK-NEXT: subs {{w[0-9]}}, {{w[0-9]}}, {{w[0-9]}}
+; CHECK-NEXT: b.ne .LBB0_2
+; CHECK-NEXT: b .LBB0_1
author	Elvina Yakubova <eyakubova@nvidia.com>	2024-06-04 15:09:42 +0100
committer	GitHub <noreply@github.com>	2024-06-04 15:09:42 +0100
commit	fadd1ec536ce76acfd572364b0e118da54116e94 (patch)
tree	16cd9744e9eea8c206f34b739a38c31c5b012ad3 /llvm
parent	2635d0419e4800c34c7cfea120a12fec8d4878fe (diff)
download	llvm-fadd1ec536ce76acfd572364b0e118da54116e94.zip llvm-fadd1ec536ce76acfd572364b0e118da54116e94.tar.gz llvm-fadd1ec536ce76acfd572364b0e118da54116e94.tar.bz2