diff options
author | Elvina Yakubova <eyakubova@nvidia.com> | 2024-06-04 15:09:42 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-04 15:09:42 +0100 |
commit | fadd1ec536ce76acfd572364b0e118da54116e94 (patch) | |
tree | 16cd9744e9eea8c206f34b739a38c31c5b012ad3 /llvm | |
parent | 2635d0419e4800c34c7cfea120a12fec8d4878fe (diff) | |
download | llvm-fadd1ec536ce76acfd572364b0e118da54116e94.zip llvm-fadd1ec536ce76acfd572364b0e118da54116e94.tar.gz llvm-fadd1ec536ce76acfd572364b0e118da54116e94.tar.bz2 |
[AArch64] Enable CmpBcc fusion for Neoverse-v2 (#90608)
This adds compare and branch instructions fusion for Neoverse V2.
According to the Software Optimization Guide:
Specific Aarch64 instruction pairs that can be fused are as follows:
CMP/CMN (immediate) + B.cond
CMP/CMN (register) + B.cond
Performance for SPEC2017 is neutral, but another benchmark improves
significantly.
Results for SPEC2017 on a Neoverse V2:
500.perlbench 0%
502.gcc_r 0%
505.mcf_r -0.15%
523.xalancbmk_r -0.43%
525.x264_r 0%
531.deepsjeng_r 0%
541.leela_r -0.16%
557.xz_r -0.47%
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64Processors.td | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll | 35 |
2 files changed, 36 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index f2286ae..8d16709 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -484,6 +484,7 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1 def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2", "Neoverse V2 ARM processors", [ FeatureFuseAES, + FeatureCmpBccFusion, FeatureFuseAdrpAdd, FeatureALULSLFast, FeaturePostRAScheduler, diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll b/llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll new file mode 100644 index 0000000..1034e07 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/misched-fusion-cmp-bcc.ll @@ -0,0 +1,35 @@ +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mattr=cmp-bcc-fusion | FileCheck %s +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a77 | FileCheck %s +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a78 | FileCheck %s +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a78ae | FileCheck %s +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a78c | FileCheck %s +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-a710 | FileCheck %s +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x715 | FileCheck %s +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x720 | FileCheck %s +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x720ae | FileCheck %s +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x1 | FileCheck %s +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=cortex-x2 | FileCheck %s +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=neoverse-v2 | FileCheck %s +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=ampere1 | FileCheck %s +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=ampere1a | FileCheck %s +; RUN: llc %s -o - -O0 -mtriple=aarch64-unknown -mcpu=ampere1b | FileCheck %s + + +define void @test_cmp_bcc_fusion(i32 %x, i32 %y, i32* %arr) { +entry: + %cmp = icmp eq i32 %x, %y + store i32 %x, i32* %arr, align 4 + br i1 %cmp, label %if_true, label %if_false + +if_true: + ret void + +if_false: + ret void +} + +; CHECK-LABEL: test_cmp_bcc_fusion: +; CHECK: str {{w[0-9]}}, [{{x[0-9]}}] +; CHECK-NEXT: subs {{w[0-9]}}, {{w[0-9]}}, {{w[0-9]}} +; CHECK-NEXT: b.ne .LBB0_2 +; CHECK-NEXT: b .LBB0_1 |