aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorSylvia Taylor <sylvia.taylor@arm.com>2019-07-09 12:51:55 +0000
committerKyrylo Tkachov <ktkachov@gcc.gnu.org>2019-07-09 12:51:55 +0000
commit5169fa77322e36dd4783bc5126185159c35a3584 (patch)
tree0f91a01db5f71dd71180d423acde7e2f5164c77a /gcc
parentb61184875ec454875b5a47f3bfa94a6d45f5b845 (diff)
downloadgcc-5169fa77322e36dd4783bc5126185159c35a3584.zip
gcc-5169fa77322e36dd4783bc5126185159c35a3584.tar.gz
gcc-5169fa77322e36dd4783bc5126185159c35a3584.tar.bz2
[aarch64]: redefine aes patterns
This first patch removes aarch64 usage of the aese/aesmc and aesd/aesimc fusions (i.e. aes fusion) implemented in the scheduler due to unpredictable behaviour observed in cases such as: - when register allocation goes bad (e.g. extra movs) - aes operations with xor and zeroed keys among interleaved operations A more stable version should be provided by instead doing the aes fusion during the combine pass. Since the aese and aesd patterns have been rewritten as encapsulating a xor operation, the existing combine fusion patterns have also been updated. The purpose is to simplify the need of having additional combine patterns for cases like the ones below: For AESE (though it also applies to AESD as both have a xor operation): data = data ^ key; data = vaeseq_u8(data, zero); --- eor v1.16b, v0.16b, v1.16b aese v1.16b, v2.16b Should mean and generate the same as: data = vaeseq_u8(data, key); --- aese v1.16b, v0.16b 2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi): Redefine pattern with xor. (aarch64_crypto_aes<aesmc_op>v16qi): Remove attribute enabled. (*aarch64_crypto_aes<aes_op>v16qi_xor_combine): Remove both. (*aarch64_crypto_aese_fused, *aarch64_crypto_aesd_fused): Update to new definition. * config/aarch64/aarch64.c (aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check. * gcc.target/aarch64/crypto-fuse-1.c: Remove. * gcc.target/aarch64/crypto-fuse-2.c: Remove. * gcc.target/aarch64/aes-fuse-1.c: New testcase. * gcc.target/aarch64/aes-fuse-2.c: New testcase. From-SVN: r273304
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/config/aarch64/aarch64-simd.md67
-rw-r--r--gcc/config/aarch64/aarch64.c4
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c (renamed from gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c)45
-rw-r--r--gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c65
-rw-r--r--gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c45
7 files changed, 135 insertions, 109 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 271a27a..25959b94 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com>
+
+ * config/aarch64/aarch64-simd.md
+ (aarch64_crypto_aes<aes_op>v16qi): Redefine pattern with xor.
+ (aarch64_crypto_aes<aesmc_op>v16qi): Remove attribute enabled.
+ (*aarch64_crypto_aes<aes_op>v16qi_xor_combine): Remove both.
+ (*aarch64_crypto_aese_fused,
+ *aarch64_crypto_aesd_fused): Update to new definition.
+ * config/aarch64/aarch64.c
+ (aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check.
+
2019-07-09 Richard Biener <rguenther@suse.de>
* gimple-match.h (gimple_match_op::resimplify): New.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 837242c..0c2600f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6053,56 +6053,23 @@
(define_insn "aarch64_crypto_aes<aes_op>v16qi"
[(set (match_operand:V16QI 0 "register_operand" "=w")
- (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
- (match_operand:V16QI 2 "register_operand" "w")]
+ (unspec:V16QI
+ [(xor:V16QI
+ (match_operand:V16QI 1 "register_operand" "%0")
+ (match_operand:V16QI 2 "register_operand" "w"))]
CRYPTO_AES))]
"TARGET_SIMD && TARGET_AES"
"aes<aes_op>\\t%0.16b, %2.16b"
[(set_attr "type" "crypto_aese")]
)
-(define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
- [(set (match_operand:V16QI 0 "register_operand" "=w")
- (unspec:V16QI [(xor:V16QI
- (match_operand:V16QI 1 "register_operand" "%0")
- (match_operand:V16QI 2 "register_operand" "w"))
- (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
- CRYPTO_AES))]
- "TARGET_SIMD && TARGET_AES"
- "aes<aes_op>\\t%0.16b, %2.16b"
- [(set_attr "type" "crypto_aese")]
-)
-
-(define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
- [(set (match_operand:V16QI 0 "register_operand" "=w")
- (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
- (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
- (match_operand:V16QI 2 "register_operand" "w"))]
- CRYPTO_AES))]
- "TARGET_SIMD && TARGET_AES"
- "aes<aes_op>\\t%0.16b, %2.16b"
- [(set_attr "type" "crypto_aese")]
-)
-
-;; When AES/AESMC fusion is enabled we want the register allocation to
-;; look like:
-;; AESE Vn, _
-;; AESMC Vn, Vn
-;; So prefer to tie operand 1 to operand 0 when fusing.
-
(define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
- [(set (match_operand:V16QI 0 "register_operand" "=w,w")
- (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
+ [(set (match_operand:V16QI 0 "register_operand" "=w")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
CRYPTO_AESMC))]
"TARGET_SIMD && TARGET_AES"
"aes<aesmc_op>\\t%0.16b, %1.16b"
- [(set_attr "type" "crypto_aesmc")
- (set_attr_alternative "enabled"
- [(if_then_else (match_test
- "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
- (const_string "yes" )
- (const_string "no"))
- (const_string "yes")])]
+ [(set_attr "type" "crypto_aesmc")]
)
;; When AESE/AESMC fusion is enabled we really want to keep the two together
@@ -6111,12 +6078,14 @@
;; Mash the two together during combine.
(define_insn "*aarch64_crypto_aese_fused"
- [(set (match_operand:V16QI 0 "register_operand" "=&w")
+ [(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI
[(unspec:V16QI
- [(match_operand:V16QI 1 "register_operand" "0")
- (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
- ] UNSPEC_AESMC))]
+ [(xor:V16QI
+ (match_operand:V16QI 1 "register_operand" "%0")
+ (match_operand:V16QI 2 "register_operand" "w"))]
+ UNSPEC_AESE)]
+ UNSPEC_AESMC))]
"TARGET_SIMD && TARGET_AES
&& aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
"aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
@@ -6130,12 +6099,14 @@
;; Mash the two together during combine.
(define_insn "*aarch64_crypto_aesd_fused"
- [(set (match_operand:V16QI 0 "register_operand" "=&w")
+ [(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI
[(unspec:V16QI
- [(match_operand:V16QI 1 "register_operand" "0")
- (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
- ] UNSPEC_AESIMC))]
+ [(xor:V16QI
+ (match_operand:V16QI 1 "register_operand" "%0")
+ (match_operand:V16QI 2 "register_operand" "w"))]
+ UNSPEC_AESD)]
+ UNSPEC_AESIMC))]
"TARGET_SIMD && TARGET_AES
&& aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
"aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a18fbd0..e4e9e3f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -17965,10 +17965,6 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
}
}
- if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
- && aarch_crypto_can_dual_issue (prev, curr))
- return true;
-
if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
&& any_condjump_p (curr))
{
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 6f334ef..3e36c0a 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com>
+
+ * gcc.target/aarch64/crypto-fuse-1.c: Remove.
+ * gcc.target/aarch64/crypto-fuse-2.c: Remove.
+ * gcc.target/aarch64/aes-fuse-1.c: New testcase.
+ * gcc.target/aarch64/aes-fuse-2.c: New testcase.
+
2019-07-09 Christophe Lyon <christophe.lyon@linaro.org>
* gcc.target/arm/cmse/bitfield-1.c: Fix address of .gnu.sgstubs
diff --git a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c b/gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c
index d8adc89..d7b4f89 100644
--- a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c
+++ b/gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c
@@ -1,45 +1,66 @@
/* { dg-do compile } */
/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
#include <arm_neon.h>
#define AESE(r, v, key) (r = vaeseq_u8 ((v), (key)));
#define AESMC(r, i) (r = vaesmcq_u8 (i))
+const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
uint8x16_t dummy;
uint8x16_t a;
uint8x16_t b;
uint8x16_t c;
uint8x16_t d;
-uint8x16_t e;
+uint8x16_t x;
+uint8x16_t y;
+uint8x16_t k;
+
+void foo (void)
-void
-foo (void)
{
- AESE (a, a, e);
+ AESE (a, a, k);
dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
- AESE (b, b, e);
+ AESE (b, b, k);
dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
- AESE (c, c, e);
+ AESE (c, c, k);
dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
- AESE (d, d, e);
+ AESE (d, d, k);
dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
- AESMC (a, a);
+ x = x ^ k;
+ AESE (x, x, zero);
dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
- AESMC (b, b);
+ y = y ^ k;
+ AESE (y, y, zero);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+
+ AESMC (d, d);
dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
AESMC (c, c);
dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
- AESMC (d, d);
-}
+ AESMC (b, b);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESMC (a, a);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
-/* { dg-final { scan-assembler-times "crypto_aese_fused" 4 } } */
+ AESMC (y, y);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESMC (x, x);
+}
+/* { dg-final { scan-assembler-times "crypto_aese_fused" 6 } } */
+/* { dg-final { scan-assembler-not "veor" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c b/gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c
new file mode 100644
index 0000000..dfe01b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c
@@ -0,0 +1,65 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
+
+#include <arm_neon.h>
+
+#define AESD(r, v, key) (r = vaesdq_u8 ((v), (key)));
+#define AESIMC(r, i) (r = vaesimcq_u8 (i))
+
+const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+uint8x16_t dummy;
+uint8x16_t a;
+uint8x16_t b;
+uint8x16_t c;
+uint8x16_t d;
+uint8x16_t x;
+uint8x16_t y;
+uint8x16_t k;
+
+void foo (void)
+{
+ AESD (a, a, k);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESD (b, b, k);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESD (c, c, k);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESD (d, d, k);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+
+ x = x ^ k;
+ AESD (x, x, zero);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ y = y ^ k;
+ AESD (y, y, zero);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+
+ AESIMC (d, d);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESIMC (c, c);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESIMC (b, b);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESIMC (a, a);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+
+ AESIMC (y, y);
+ dummy = vaddq_u8 (dummy, dummy);
+ dummy = vaddq_u8 (dummy, dummy);
+ AESIMC (x, x);
+}
+
+/* { dg-final { scan-assembler-times "crypto_aesd_fused" 6 } } */
+/* { dg-final { scan-assembler-not "veor" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c b/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c
deleted file mode 100644
index b12df2d..0000000
--- a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */
-
-#include <arm_neon.h>
-
-#define AESE(r, v, key) (r = vaesdq_u8 ((v), (key)));
-#define AESMC(r, i) (r = vaesimcq_u8 (i))
-
-uint8x16_t dummy;
-uint8x16_t a;
-uint8x16_t b;
-uint8x16_t c;
-uint8x16_t d;
-uint8x16_t e;
-
-void
-foo (void)
-{
- AESE (a, a, e);
- dummy = vaddq_u8 (dummy, dummy);
- dummy = vaddq_u8 (dummy, dummy);
- AESE (b, b, e);
- dummy = vaddq_u8 (dummy, dummy);
- dummy = vaddq_u8 (dummy, dummy);
- AESE (c, c, e);
- dummy = vaddq_u8 (dummy, dummy);
- dummy = vaddq_u8 (dummy, dummy);
- AESE (d, d, e);
- dummy = vaddq_u8 (dummy, dummy);
- dummy = vaddq_u8 (dummy, dummy);
-
- AESMC (a, a);
- dummy = vaddq_u8 (dummy, dummy);
- dummy = vaddq_u8 (dummy, dummy);
- AESMC (b, b);
- dummy = vaddq_u8 (dummy, dummy);
- dummy = vaddq_u8 (dummy, dummy);
- AESMC (c, c);
- dummy = vaddq_u8 (dummy, dummy);
- dummy = vaddq_u8 (dummy, dummy);
- AESMC (d, d);
-}
-
-/* { dg-final { scan-assembler-times "crypto_aesd_fused" 4 } } */
-