diff options
author | Wilco Dijkstra <wdijkstr@arm.com> | 2016-02-10 12:52:23 +0000 |
---|---|---|
committer | Wilco Dijkstra <wilco@gcc.gnu.org> | 2016-02-10 12:52:23 +0000 |
commit | 00a8574af2b7df89e82dab3c415de8b7cb48a333 (patch) | |
tree | 4c879ecb760ddc8f8f42a294ede6907a665ed74a /gcc/config/aarch64 | |
parent | 24a179f835ec7ce4376071d3744ecd0083841ae7 (diff) | |
download | gcc-00a8574af2b7df89e82dab3c415de8b7cb48a333.zip gcc-00a8574af2b7df89e82dab3c415de8b7cb48a333.tar.gz gcc-00a8574af2b7df89e82dab3c415de8b7cb48a333.tar.bz2 |
Enable instruction fusion of dependent AESE; AESMC and AESD; AESIMC pairs.
This can give up to 2x speedup on many AArch64 implementations. Also model
the crypto instructions on Cortex-A57 according to the Optimization Guide.
gcc/
* config/aarch64/aarch64.c (cortexa53_tunings): Enable AES fusion.
(cortexa57_tunings): Likewise.
(cortexa72_tunings): Likewise.
(arch_macro_fusion_pair_p): Add support for AES fusion.
* config/aarch64/aarch64-fusion-pairs.def: Add AES_AESMC entry.
* config/arm/aarch-common.c (aarch_crypto_can_dual_issue):
Allow virtual registers before reload so early scheduling works.
* config/arm/cortex-a57.md (cortex_a57_crypto_simple): Use
correct latency and pipeline.
(cortex_a57_crypto_complex): Likewise.
(cortex_a57_crypto_xor): Likewise.
(define_bypass): Add AES bypass.
From-SVN: r233268
Diffstat (limited to 'gcc/config/aarch64')
-rw-r--r-- | gcc/config/aarch64/aarch64-fusion-pairs.def | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 10 |
2 files changed, 8 insertions, 3 deletions
diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def index 8261da0..f488671 100644 --- a/gcc/config/aarch64/aarch64-fusion-pairs.def +++ b/gcc/config/aarch64/aarch64-fusion-pairs.def @@ -33,4 +33,5 @@ AARCH64_FUSION_PAIR ("adrp+add", ADRP_ADD) AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK) AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR) AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH) +AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 7ab5ec8..cb0892e 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -451,7 +451,7 @@ static const struct tune_params cortexa53_tunings = &generic_branch_cost, 4, /* memmov_cost */ 2, /* issue_rate */ - (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD + (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */ 8, /* function_align. */ 8, /* jump_align. */ @@ -476,7 +476,7 @@ static const struct tune_params cortexa57_tunings = &cortexa57_branch_cost, 4, /* memmov_cost */ 3, /* issue_rate */ - (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD + (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */ 16, /* function_align. */ 8, /* jump_align. */ @@ -502,7 +502,7 @@ static const struct tune_params cortexa72_tunings = &generic_branch_cost, 4, /* memmov_cost */ 3, /* issue_rate */ - (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD + (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */ 16, /* function_align. */ 8, /* jump_align. */ @@ -13328,6 +13328,10 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) } } + if ((aarch64_tune_params.fusible_ops & AARCH64_FUSE_AES_AESMC) + && aarch_crypto_can_dual_issue (prev, curr)) + return true; + if ((aarch64_tune_params.fusible_ops & AARCH64_FUSE_CMP_BRANCH) && any_condjump_p (curr)) { |