aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2014-03-28 17:24:52 +0000
committerKyrylo Tkachov <ktkachov@gcc.gnu.org>2014-03-28 17:24:52 +0000
commited6eb6dc9f67948da5f87b9c75d6d03bf696446e (patch)
tree4449b0f8f099c92e987e7aa8b1311aaae0f48787 /gcc
parentb10baa9584d009f3725083c97c7d44f88749abe0 (diff)
downloadgcc-ed6eb6dc9f67948da5f87b9c75d6d03bf696446e.zip
gcc-ed6eb6dc9f67948da5f87b9c75d6d03bf696446e.tar.gz
gcc-ed6eb6dc9f67948da5f87b9c75d6d03bf696446e.tar.bz2
[ARM/AArch64][2/2] Crypto intrinsics tuning for Cortex-A53 - pipeline description
* config/arm/aarch-common.c (aarch_crypto_can_dual_issue): New. * config/arm/aarch-common-protos.h (aarch_crypto_can_dual_issue): Declare extern. * config/arm/cortex-a53.md: Add reservations and bypass for crypto instructions as well as AdvancedSIMD loads. From-SVN: r208910
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/config/arm/aarch-common-protos.h1
-rw-r--r--gcc/config/arm/aarch-common.c36
-rw-r--r--gcc/config/arm/cortex-a53.md56
4 files changed, 101 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c023b90..8434f04 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,13 @@
2014-03-28 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+ * config/arm/aarch-common.c (aarch_crypto_can_dual_issue): New.
+ * config/arm/aarch-common-protos.h (aarch_crypto_can_dual_issue):
+ Declare extern.
+ * config/arm/cortex-a53.md: Add reservations and bypass for crypto
+ instructions as well as AdvancedSIMD loads.
+
+2014-03-28 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
* config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi):
Use crypto_aese type.
(aarch64_crypto_aes<aesmc_op>v16qi): Use crypto_aesmc type.
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index a5ff6b4..3e6e242 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -23,6 +23,7 @@
#ifndef GCC_AARCH_COMMON_PROTOS_H
#define GCC_AARCH_COMMON_PROTOS_H
+extern int aarch_crypto_can_dual_issue (rtx, rtx);
extern int arm_early_load_addr_dep (rtx, rtx);
extern int arm_early_store_addr_dep (rtx, rtx);
extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
index c11f7e9..af8fc99 100644
--- a/gcc/config/arm/aarch-common.c
+++ b/gcc/config/arm/aarch-common.c
@@ -31,6 +31,42 @@
#include "c-family/c-common.h"
#include "rtl.h"
+/* In ARMv8-A there's a general expectation that AESE/AESMC
+ and AESD/AESIMC sequences of the form:
+
+ AESE Vn, _
+ AESMC Vn, Vn
+
+ will issue both instructions in a single cycle on super-scalar
+ implementations. This function identifies such pairs. */
+
+int
+aarch_crypto_can_dual_issue (rtx producer, rtx consumer)
+{
+ rtx producer_src, consumer_src;
+
+ producer = single_set (producer);
+ consumer = single_set (consumer);
+
+ producer_src = producer ? SET_SRC (producer) : NULL;
+ consumer_src = consumer ? SET_SRC (consumer) : NULL;
+
+ if (producer_src && consumer_src
+ && GET_CODE (producer_src) == UNSPEC && GET_CODE (consumer_src) == UNSPEC
+ && ((XINT (producer_src, 1) == UNSPEC_AESE
+ && XINT (consumer_src, 1) == UNSPEC_AESMC)
+ || (XINT (producer_src, 1) == UNSPEC_AESD
+ && XINT (consumer_src, 1) == UNSPEC_AESIMC)))
+ {
+ unsigned int regno = REGNO (SET_DEST (producer));
+
+ return REGNO (SET_DEST (consumer)) == regno
+ && REGNO (XVECEXP (consumer_src, 0, 0)) == regno;
+ }
+
+ return 0;
+}
+
typedef struct
{
rtx_code search_code;
diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md
index deae8eb..b131c81 100644
--- a/gcc/config/arm/cortex-a53.md
+++ b/gcc/config/arm/cortex-a53.md
@@ -61,6 +61,11 @@
(define_cpu_unit "cortex_a53_fp_div_sqrt" "cortex_a53")
+;; The Advanced SIMD pipelines.
+
+(define_cpu_unit "cortex_a53_simd0" "cortex_a53")
+(define_cpu_unit "cortex_a53_simd1" "cortex_a53")
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instructions.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -248,6 +253,39 @@
"cortex_a53_slot0, cortex_a53_fp_div_sqrt * 28")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ARMv8-A Cryptographic extensions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a53_crypto_aese" 2
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "type" "crypto_aese"))
+ "cortex_a53_simd0")
+
+(define_insn_reservation "cortex_a53_crypto_aesmc" 2
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "type" "crypto_aesmc"))
+ "cortex_a53_simd0 | cortex_a53_simd1")
+
+(define_insn_reservation "cortex_a53_crypto_sha1_fast" 2
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "type" "crypto_sha1_fast, crypto_sha256_fast"))
+ "cortex_a53_simd0")
+
+(define_insn_reservation "cortex_a53_crypto_sha1_xor" 3
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "type" "crypto_sha1_xor"))
+ "cortex_a53_simd0")
+
+(define_insn_reservation "cortex_a53_crypto_sha_slow" 5
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow"))
+ "cortex_a53_simd0")
+
+(define_bypass 0 "cortex_a53_crypto_aese"
+ "cortex_a53_crypto_aesmc"
+ "aarch_crypto_can_dual_issue")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; VFP to/from core transfers.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -284,6 +322,16 @@
(eq_attr "type" "f_loadd"))
"cortex_a53_slot0")
+(define_insn_reservation "cortex_a53_f_load_2reg" 5
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "type" "neon_load2_2reg_q"))
+ "(cortex_a53_slot_any+cortex_a53_ls)*2")
+
+(define_insn_reservation "cortex_a53_f_loadq" 5
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "type" "neon_load1_1reg_q"))
+ "cortex_a53_slot_any+cortex_a53_ls")
+
(define_insn_reservation "cortex_a53_f_stores" 0
(and (eq_attr "tune" "cortexa53")
(eq_attr "type" "f_stores"))
@@ -307,3 +355,11 @@
cortex_a53_fdivs, cortex_a53_fdivd,\
cortex_a53_f2r")
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Crude Advanced SIMD approximation.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_53_advsimd" 4
+ (and (eq_attr "tune" "cortexa53")
+ (eq_attr "is_neon_type" "yes"))
+ "cortex_a53_simd0")