aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Collison <michael.collison@arm.com>2018-01-11 06:04:17 +0000
committerMichael Collison <collison@gcc.gnu.org>2018-01-11 06:04:17 +0000
commit27086ea39f16addb987edb4719afb7cd9b01800c (patch)
tree031e83cfb17468b6933e2908d6439d510e6ac6ad
parent1bfd6a0038c9ade9db7cbae3b550d14a62c91ebf (diff)
downloadgcc-27086ea39f16addb987edb4719afb7cd9b01800c.zip
gcc-27086ea39f16addb987edb4719afb7cd9b01800c.tar.gz
gcc-27086ea39f16addb987edb4719afb7cd9b01800c.tar.bz2
aarch64-modes.def (V2HF): New VECTOR_MODE.
2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'fp16fml'. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true. * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate. * config/aarch64/constraints.md (Ui7): New constraint. * config/aarch64/iterators.md (VFMLA_W): New mode iterator. (VFMLA_SEL_W): Ditto. (f16quad): Ditto. (f16mac1): Ditto. (VFMLA16_LOW): New int iterator. (VFMLA16_HIGH): Ditto. (UNSPEC_FMLAL): New unspec. (UNSPEC_FMLSL): Ditto. (UNSPEC_FMLAL2): Ditto. (UNSPEC_FMLSL2): Ditto. (f16mac): New code attribute. * config/aarch64/aarch64-simd-builtins.def (aarch64_fmlal_lowv2sf): Ditto. (aarch64_fmlsl_lowv2sf): Ditto. (aarch64_fmlalq_lowv4sf): Ditto. (aarch64_fmlslq_lowv4sf): Ditto. (aarch64_fmlal_highv2sf): Ditto. (aarch64_fmlsl_highv2sf): Ditto. (aarch64_fmlalq_highv4sf): Ditto. (aarch64_fmlslq_highv4sf): Ditto. (aarch64_fmlal_lane_lowv2sf): Ditto. (aarch64_fmlsl_lane_lowv2sf): Ditto. (aarch64_fmlal_laneq_lowv2sf): Ditto. (aarch64_fmlsl_laneq_lowv2sf): Ditto. (aarch64_fmlalq_lane_lowv4sf): Ditto. (aarch64_fmlsl_lane_lowv4sf): Ditto. (aarch64_fmlalq_laneq_lowv4sf): Ditto. (aarch64_fmlsl_laneq_lowv4sf): Ditto. (aarch64_fmlal_lane_highv2sf): Ditto. (aarch64_fmlsl_lane_highv2sf): Ditto. (aarch64_fmlal_laneq_highv2sf): Ditto. (aarch64_fmlsl_laneq_highv2sf): Ditto. (aarch64_fmlalq_lane_highv4sf): Ditto. (aarch64_fmlsl_lane_highv4sf): Ditto. (aarch64_fmlalq_laneq_highv4sf): Ditto. (aarch64_fmlsl_laneq_highv4sf): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern. (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto. (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto. (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto. (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto. (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto. (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto. (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto. (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto. (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto. * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic. (vfmlsl_low_u32): Ditto. (vfmlalq_low_u32): Ditto. (vfmlslq_low_u32): Ditto. (vfmlal_high_u32): Ditto. (vfmlsl_high_u32): Ditto. (vfmlalq_high_u32): Ditto. (vfmlslq_high_u32): Ditto. (vfmlal_lane_low_u32): Ditto. (vfmlsl_lane_low_u32): Ditto. (vfmlal_laneq_low_u32): Ditto. (vfmlsl_laneq_low_u32): Ditto. (vfmlalq_lane_low_u32): Ditto. (vfmlslq_lane_low_u32): Ditto. (vfmlalq_laneq_low_u32): Ditto. (vfmlslq_laneq_low_u32): Ditto. (vfmlal_lane_high_u32): Ditto. (vfmlsl_lane_high_u32): Ditto. (vfmlal_laneq_high_u32): Ditto. (vfmlsl_laneq_high_u32): Ditto. (vfmlalq_lane_high_u32): Ditto. (vfmlslq_lane_high_u32): Ditto. (vfmlalq_laneq_high_u32): Ditto. (vfmlslq_laneq_high_u32): Ditto. * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_ISA_F16FML): New ISA flag. (TARGET_F16FML): New feature flag for fp16fml. (doc/invoke.texi): Document new fp16fml option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true. * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags. (AARCH64_ISA_SHA3): New ISA flag. (TARGET_SHA3): New feature flag for sha3. * config/aarch64/iterators.md (sha512_op): New int attribute. (CRYPTO_SHA512): New int iterator. (UNSPEC_SHA512H): New unspec. (UNSPEC_SHA512H2): Ditto. (UNSPEC_SHA512SU0): Ditto. (UNSPEC_SHA512SU1): Ditto. * config/aarch64/aarch64-simd-builtins.def (aarch64_crypto_sha512hqv2di): New builtin. (aarch64_crypto_sha512h2qv2di): Ditto. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern. (aarch64_crypto_sha512su0qv2di): Ditto. (aarch64_crypto_sha512su1qv2di): Ditto. (aarch64_eor3qv8hi): Ditto. (aarch64_rax1qv2di): Ditto. (aarch64_xarqv2di): Ditto. (aarch64_bcaxqv8hi): Ditto. * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic. (vsha512h2q_u64): Ditto. (vsha512su0q_u64): Ditto. (vsha512su1q_u64): Ditto. (veor3q_u16): Ditto. (vrax1q_u64): Ditto. (vxarq_u64): Ditto. (vbcaxq_u16): Ditto. * config/arm/types.md (crypto_sha512): New type attribute. (crypto_sha3): Ditto. (doc/invoke.texi): Document new sha3 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-builtins.c: (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New. * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true. (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true. * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags. (AARCH64_ISA_SM4): New ISA flag. (TARGET_SM4): New feature flag for sm4. * config/aarch64/aarch64-simd-builtins.def (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt1aq4si): Ditto. (aarch64_sm3tt1bq4si): Ditto. (aarch64_sm3tt2aq4si): Ditto. (aarch64_sm3tt2bq4si): Ditto. (aarch64_sm3partw1qv4si): Ditto. (aarch64_sm3partw2qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/aarch64-simd.md: (aarch64_sm3ss1qv4si): Ditto. (aarch64_sm3tt<sm3tt_op>qv4si): Ditto. (aarch64_sm3partw<sm3part_op>qv4si): Ditto. (aarch64_sm4eqv4si): Ditto. (aarch64_sm4ekeyqv4si): Ditto. * config/aarch64/iterators.md (sm3tt_op): New int iterator. (sm3part_op): Ditto. (CRYPTO_SM3TT): Ditto. (CRYPTO_SM3PART): Ditto. (UNSPEC_SM3SS1): New unspec. (UNSPEC_SM3TT1A): Ditto. (UNSPEC_SM3TT1B): Ditto. (UNSPEC_SM3TT2A): Ditto. (UNSPEC_SM3TT2B): Ditto. (UNSPEC_SM3PARTW1): Ditto. (UNSPEC_SM3PARTW2): Ditto. (UNSPEC_SM4E): Ditto. (UNSPEC_SM4EKEY): Ditto. * config/aarch64/constraints.md (Ui2): New constraint. * config/aarch64/predicates.md (aarch64_imm2): New predicate. * config/arm/types.md (crypto_sm3): New type attribute. (crypto_sm4): Ditto. * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic. (vsm3tt1aq_u32): Ditto. (vsm3tt1bq_u32): Ditto. (vsm3tt2aq_u32): Ditto. (vsm3tt2bq_u32): Ditto. (vsm3partw1q_u32): Ditto. (vsm3partw2q_u32): Ditto. (vsm4eq_u32): Ditto. (vsm4ekeyq_u32): Ditto. (doc/invoke.texi): Document new sm4 option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture. * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag. (AARCH64_FL_FOR_ARCH8_4): New. (AARCH64_FL_V8_4): New flag. (doc/invoke.texi): Document new armv8.4-a option. 2018-01-10 Michael Collison <michael.collison@arm.com> * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): (__ARM_FEATURE_AES): Define if TARGET_AES is true. (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true. * config/aarch64/aarch64-option-extension.def: Add AARCH64_OPT_EXTENSION of 'sha2'. (aes): Add AARCH64_OPT_EXTENSION of 'aes'. (crypto): Disable sha2 and aes if crypto disabled. (crypto): Enable aes and sha2 if enabled. (simd): Disable sha2 and aes if simd disabled. * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2): New flags. (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags. (TARGET_SHA2): New feature flag for sha2. (TARGET_AES): New feature flag for aes. * config/aarch64/aarch64-simd.md: (aarch64_crypto_aes<aes_op>v16qi): Make pattern conditional on TARGET_AES. (aarch64_crypto_aes<aesmc_op>v16qi): Ditto. (aarch64_crypto_sha1hsi): Make pattern conditional on TARGET_SHA2. (aarch64_crypto_sha1hv4si): Ditto. (aarch64_be_crypto_sha1hv4si): Ditto. (aarch64_crypto_sha1su1v4si): Ditto. (aarch64_crypto_sha1<sha1_op>v4si): Ditto. (aarch64_crypto_sha1su0v4si): Ditto. (aarch64_crypto_sha256h<sha256_op>v4si): Ditto. (aarch64_crypto_sha256su0v4si): Ditto. (aarch64_crypto_sha256su1v4si): Ditto. (doc/invoke.texi): Document new aes and sha2 options. From-SVN: r256478
-rw-r--r--gcc/ChangeLog234
-rw-r--r--gcc/config/aarch64/aarch64-arches.def1
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c12
-rw-r--r--gcc/config/aarch64/aarch64-c.c8
-rw-r--r--gcc/config/aarch64/aarch64-modes.def1
-rw-r--r--gcc/config/aarch64/aarch64-option-extensions.def44
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def66
-rw-r--r--gcc/config/aarch64/aarch64-simd.md761
-rw-r--r--gcc/config/aarch64/aarch64.h31
-rw-r--r--gcc/config/aarch64/arm_neon.h316
-rw-r--r--gcc/config/aarch64/constraints.md12
-rw-r--r--gcc/config/aarch64/iterators.md49
-rw-r--r--gcc/config/aarch64/predicates.md12
-rw-r--r--gcc/config/arm/types.md4
-rw-r--r--gcc/doc/invoke.texi26
-rw-r--r--gcc/testsuite/ChangeLog34
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h25
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_high_1.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_high_2.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_high_3.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h49
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high_1.c14
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high_2.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high_3.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h49
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low_1.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low_2.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low_3.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h25
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_low_1.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_low_2.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fp16_fmul_low_3.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sha2.h25
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sha2_1.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sha2_2.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sha2_3.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sha3.h25
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sha3_1.c10
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sha3_2.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sha3_3.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sm3_sm4.c78
41 files changed, 2050 insertions, 25 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index f69ddc6..7eb6c1c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,237 @@
+2018-01-10 Michael Collison <michael.collison@arm.com>
+
+ * config/aarch64/aarch64-modes.def (V2HF): New VECTOR_MODE.
+ * config/aarch64/aarch64-option-extension.def: Add
+ AARCH64_OPT_EXTENSION of 'fp16fml'.
+ * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins):
+ (__ARM_FEATURE_FP16_FML): Define if TARGET_F16FML is true.
+ * config/aarch64/predicates.md (aarch64_lane_imm3): New predicate.
+ * config/aarch64/constraints.md (Ui7): New constraint.
+ * config/aarch64/iterators.md (VFMLA_W): New mode iterator.
+ (VFMLA_SEL_W): Ditto.
+ (f16quad): Ditto.
+ (f16mac1): Ditto.
+ (VFMLA16_LOW): New int iterator.
+ (VFMLA16_HIGH): Ditto.
+ (UNSPEC_FMLAL): New unspec.
+ (UNSPEC_FMLSL): Ditto.
+ (UNSPEC_FMLAL2): Ditto.
+ (UNSPEC_FMLSL2): Ditto.
+ (f16mac): New code attribute.
+ * config/aarch64/aarch64-simd-builtins.def
+ (aarch64_fmlal_lowv2sf): Ditto.
+ (aarch64_fmlsl_lowv2sf): Ditto.
+ (aarch64_fmlalq_lowv4sf): Ditto.
+ (aarch64_fmlslq_lowv4sf): Ditto.
+ (aarch64_fmlal_highv2sf): Ditto.
+ (aarch64_fmlsl_highv2sf): Ditto.
+ (aarch64_fmlalq_highv4sf): Ditto.
+ (aarch64_fmlslq_highv4sf): Ditto.
+ (aarch64_fmlal_lane_lowv2sf): Ditto.
+ (aarch64_fmlsl_lane_lowv2sf): Ditto.
+ (aarch64_fmlal_laneq_lowv2sf): Ditto.
+ (aarch64_fmlsl_laneq_lowv2sf): Ditto.
+ (aarch64_fmlalq_lane_lowv4sf): Ditto.
+ (aarch64_fmlsl_lane_lowv4sf): Ditto.
+ (aarch64_fmlalq_laneq_lowv4sf): Ditto.
+ (aarch64_fmlsl_laneq_lowv4sf): Ditto.
+ (aarch64_fmlal_lane_highv2sf): Ditto.
+ (aarch64_fmlsl_lane_highv2sf): Ditto.
+ (aarch64_fmlal_laneq_highv2sf): Ditto.
+ (aarch64_fmlsl_laneq_highv2sf): Ditto.
+ (aarch64_fmlalq_lane_highv4sf): Ditto.
+ (aarch64_fmlsl_lane_highv4sf): Ditto.
+ (aarch64_fmlalq_laneq_highv4sf): Ditto.
+ (aarch64_fmlsl_laneq_highv4sf): Ditto.
+ * config/aarch64/aarch64-simd.md:
+ (aarch64_fml<f16mac1>l<f16quad>_low<mode>): New pattern.
+ (aarch64_fml<f16mac1>l<f16quad>_high<mode>): Ditto.
+ (aarch64_simd_fml<f16mac1>l<f16quad>_low<mode>): Ditto.
+ (aarch64_simd_fml<f16mac1>l<f16quad>_high<mode>): Ditto.
+ (aarch64_fml<f16mac1>l_lane_lowv2sf): Ditto.
+ (aarch64_fml<f16mac1>l_lane_highv2sf): Ditto.
+ (aarch64_simd_fml<f16mac>l_lane_lowv2sf): Ditto.
+ (aarch64_simd_fml<f16mac>l_lane_highv2sf): Ditto.
+ (aarch64_fml<f16mac1>lq_laneq_lowv4sf): Ditto.
+ (aarch64_fml<f16mac1>lq_laneq_highv4sf): Ditto.
+ (aarch64_simd_fml<f16mac>lq_laneq_lowv4sf): Ditto.
+ (aarch64_simd_fml<f16mac>lq_laneq_highv4sf): Ditto.
+ (aarch64_fml<f16mac1>l_laneq_lowv2sf): Ditto.
+ (aarch64_fml<f16mac1>l_laneq_highv2sf): Ditto.
+ (aarch64_simd_fml<f16mac>l_laneq_lowv2sf): Ditto.
+ (aarch64_simd_fml<f16mac>l_laneq_highv2sf): Ditto.
+ (aarch64_fml<f16mac1>lq_lane_lowv4sf): Ditto.
+ (aarch64_fml<f16mac1>lq_lane_highv4sf): Ditto.
+ (aarch64_simd_fml<f16mac>lq_lane_lowv4sf): Ditto.
+ (aarch64_simd_fml<f16mac>lq_lane_highv4sf): Ditto.
+ * config/aarch64/arm_neon.h (vfmlal_low_u32): New intrinsic.
+ (vfmlsl_low_u32): Ditto.
+ (vfmlalq_low_u32): Ditto.
+ (vfmlslq_low_u32): Ditto.
+ (vfmlal_high_u32): Ditto.
+ (vfmlsl_high_u32): Ditto.
+ (vfmlalq_high_u32): Ditto.
+ (vfmlslq_high_u32): Ditto.
+ (vfmlal_lane_low_u32): Ditto.
+ (vfmlsl_lane_low_u32): Ditto.
+ (vfmlal_laneq_low_u32): Ditto.
+ (vfmlsl_laneq_low_u32): Ditto.
+ (vfmlalq_lane_low_u32): Ditto.
+ (vfmlslq_lane_low_u32): Ditto.
+ (vfmlalq_laneq_low_u32): Ditto.
+ (vfmlslq_laneq_low_u32): Ditto.
+ (vfmlal_lane_high_u32): Ditto.
+ (vfmlsl_lane_high_u32): Ditto.
+ (vfmlal_laneq_high_u32): Ditto.
+ (vfmlsl_laneq_high_u32): Ditto.
+ (vfmlalq_lane_high_u32): Ditto.
+ (vfmlslq_lane_high_u32): Ditto.
+ (vfmlalq_laneq_high_u32): Ditto.
+ (vfmlslq_laneq_high_u32): Ditto.
+ * config/aarch64/aarch64.h (AARCH64_FL_F16SML): New flag.
+ (AARCH64_FL_FOR_ARCH8_4): New.
+ (AARCH64_ISA_F16FML): New ISA flag.
+ (TARGET_F16FML): New feature flag for fp16fml.
+ (doc/invoke.texi): Document new fp16fml option.
+
+2018-01-10 Michael Collison <michael.collison@arm.com>
+
+ * config/aarch64/aarch64-builtins.c:
+ (aarch64_types_ternopu_imm_qualifiers, TYPES_TERNOPUI): New.
+ * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins):
+ (__ARM_FEATURE_SHA3): Define if TARGET_SHA3 is true.
+ * config/aarch64/aarch64.h (AARCH64_FL_SHA3): New flags.
+ (AARCH64_ISA_SHA3): New ISA flag.
+ (TARGET_SHA3): New feature flag for sha3.
+ * config/aarch64/iterators.md (sha512_op): New int attribute.
+ (CRYPTO_SHA512): New int iterator.
+ (UNSPEC_SHA512H): New unspec.
+ (UNSPEC_SHA512H2): Ditto.
+ (UNSPEC_SHA512SU0): Ditto.
+ (UNSPEC_SHA512SU1): Ditto.
+ * config/aarch64/aarch64-simd-builtins.def
+ (aarch64_crypto_sha512hqv2di): New builtin.
+ (aarch64_crypto_sha512h2qv2di): Ditto.
+ (aarch64_crypto_sha512su0qv2di): Ditto.
+ (aarch64_crypto_sha512su1qv2di): Ditto.
+ (aarch64_eor3qv8hi): Ditto.
+ (aarch64_rax1qv2di): Ditto.
+ (aarch64_xarqv2di): Ditto.
+ (aarch64_bcaxqv8hi): Ditto.
+ * config/aarch64/aarch64-simd.md:
+ (aarch64_crypto_sha512h<sha512_op>qv2di): New pattern.
+ (aarch64_crypto_sha512su0qv2di): Ditto.
+ (aarch64_crypto_sha512su1qv2di): Ditto.
+ (aarch64_eor3qv8hi): Ditto.
+ (aarch64_rax1qv2di): Ditto.
+ (aarch64_xarqv2di): Ditto.
+ (aarch64_bcaxqv8hi): Ditto.
+ * config/aarch64/arm_neon.h (vsha512hq_u64): New intrinsic.
+ (vsha512h2q_u64): Ditto.
+ (vsha512su0q_u64): Ditto.
+ (vsha512su1q_u64): Ditto.
+ (veor3q_u16): Ditto.
+ (vrax1q_u64): Ditto.
+ (vxarq_u64): Ditto.
+ (vbcaxq_u16): Ditto.
+ * config/arm/types.md (crypto_sha512): New type attribute.
+ (crypto_sha3): Ditto.
+ (doc/invoke.texi): Document new sha3 option.
+
+2018-01-10 Michael Collison <michael.collison@arm.com>
+
+ * config/aarch64/aarch64-builtins.c:
+ (aarch64_types_quadopu_imm_qualifiers, TYPES_QUADOPUI): New.
+ * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins):
+ (__ARM_FEATURE_SM3): Define if TARGET_SM4 is true.
+ (__ARM_FEATURE_SM4): Define if TARGET_SM4 is true.
+ * config/aarch64/aarch64.h (AARCH64_FL_SM4): New flags.
+ (AARCH64_ISA_SM4): New ISA flag.
+ (TARGET_SM4): New feature flag for sm4.
+ * config/aarch64/aarch64-simd-builtins.def
+ (aarch64_sm3ss1qv4si): Ditto.
+ (aarch64_sm3tt1aq4si): Ditto.
+ (aarch64_sm3tt1bq4si): Ditto.
+ (aarch64_sm3tt2aq4si): Ditto.
+ (aarch64_sm3tt2bq4si): Ditto.
+ (aarch64_sm3partw1qv4si): Ditto.
+ (aarch64_sm3partw2qv4si): Ditto.
+ (aarch64_sm4eqv4si): Ditto.
+ (aarch64_sm4ekeyqv4si): Ditto.
+ * config/aarch64/aarch64-simd.md:
+ (aarch64_sm3ss1qv4si): Ditto.
+ (aarch64_sm3tt<sm3tt_op>qv4si): Ditto.
+ (aarch64_sm3partw<sm3part_op>qv4si): Ditto.
+ (aarch64_sm4eqv4si): Ditto.
+ (aarch64_sm4ekeyqv4si): Ditto.
+ * config/aarch64/iterators.md (sm3tt_op): New int iterator.
+ (sm3part_op): Ditto.
+ (CRYPTO_SM3TT): Ditto.
+ (CRYPTO_SM3PART): Ditto.
+ (UNSPEC_SM3SS1): New unspec.
+ (UNSPEC_SM3TT1A): Ditto.
+ (UNSPEC_SM3TT1B): Ditto.
+ (UNSPEC_SM3TT2A): Ditto.
+ (UNSPEC_SM3TT2B): Ditto.
+ (UNSPEC_SM3PARTW1): Ditto.
+ (UNSPEC_SM3PARTW2): Ditto.
+ (UNSPEC_SM4E): Ditto.
+ (UNSPEC_SM4EKEY): Ditto.
+ * config/aarch64/constraints.md (Ui2): New constraint.
+ * config/aarch64/predicates.md (aarch64_imm2): New predicate.
+ * config/arm/types.md (crypto_sm3): New type attribute.
+ (crypto_sm4): Ditto.
+ * config/aarch64/arm_neon.h (vsm3ss1q_u32): New intrinsic.
+ (vsm3tt1aq_u32): Ditto.
+ (vsm3tt1bq_u32): Ditto.
+ (vsm3tt2aq_u32): Ditto.
+ (vsm3tt2bq_u32): Ditto.
+ (vsm3partw1q_u32): Ditto.
+ (vsm3partw2q_u32): Ditto.
+ (vsm4eq_u32): Ditto.
+ (vsm4ekeyq_u32): Ditto.
+ (doc/invoke.texi): Document new sm4 option.
+
+2018-01-10 Michael Collison <michael.collison@arm.com>
+
+ * config/aarch64/aarch64-arches.def (armv8.4-a): New architecture.
+ * config/aarch64/aarch64.h (AARCH64_ISA_V8_4): New ISA flag.
+ (AARCH64_FL_FOR_ARCH8_4): New.
+ (AARCH64_FL_V8_4): New flag.
+ (doc/invoke.texi): Document new armv8.4-a option.
+
+2018-01-10 Michael Collison <michael.collison@arm.com>
+
+ * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins):
+ (__ARM_FEATURE_AES): Define if TARGET_AES is true.
+ (__ARM_FEATURE_SHA2): Define if TARGET_SHA2 is true.
+ * config/aarch64/aarch64-option-extension.def: Add
+ AARCH64_OPT_EXTENSION of 'sha2'.
+ (aes): Add AARCH64_OPT_EXTENSION of 'aes'.
+ (crypto): Disable sha2 and aes if crypto disabled.
+ (crypto): Enable aes and sha2 if enabled.
+ (simd): Disable sha2 and aes if simd disabled.
+ * config/aarch64/aarch64.h (AARCH64_FL_AES, AARCH64_FL_SHA2):
+ New flags.
+ (AARCH64_ISA_AES, AARCH64_ISA_SHA2): New ISA flags.
+ (TARGET_SHA2): New feature flag for sha2.
+ (TARGET_AES): New feature flag for aes.
+ * config/aarch64/aarch64-simd.md:
+ (aarch64_crypto_aes<aes_op>v16qi): Make pattern
+ conditional on TARGET_AES.
+ (aarch64_crypto_aes<aesmc_op>v16qi): Ditto.
+ (aarch64_crypto_sha1hsi): Make pattern conditional
+ on TARGET_SHA2.
+ (aarch64_crypto_sha1hv4si): Ditto.
+ (aarch64_be_crypto_sha1hv4si): Ditto.
+ (aarch64_crypto_sha1su1v4si): Ditto.
+ (aarch64_crypto_sha1<sha1_op>v4si): Ditto.
+ (aarch64_crypto_sha1su0v4si): Ditto.
+ (aarch64_crypto_sha256h<sha256_op>v4si): Ditto.
+ (aarch64_crypto_sha256su0v4si): Ditto.
+ (aarch64_crypto_sha256su1v4si): Ditto.
+ (doc/invoke.texi): Document new aes and sha2 options.
+
2018-01-10 Martin Sebor <msebor@redhat.com>
PR tree-optimization/83781
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
index 0020c22..a37a555 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -34,5 +34,6 @@ AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_ARCH8)
AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_ARCH8_1)
AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_ARCH8_2)
AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_ARCH8_3)
+AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_ARCH8_4)
#undef AARCH64_ARCH
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 801ad52..9775d3a 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -164,6 +164,12 @@ aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_unsigned,
qualifier_unsigned, qualifier_unsigned };
#define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_unsigned, qualifier_unsigned,
+ qualifier_unsigned, qualifier_immediate };
+#define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
+
static enum aarch64_type_qualifiers
aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -177,6 +183,12 @@ aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
static enum aarch64_type_qualifiers
+aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
+ qualifier_unsigned, qualifier_immediate };
+#define TYPES_QUADOPUI (aarch64_types_quadopu_imm_qualifiers)
+
+static enum aarch64_type_qualifiers
aarch64_types_binop_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_poly, qualifier_none, qualifier_immediate };
#define TYPES_GETREGP (aarch64_types_binop_imm_p_qualifiers)
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index c1892ed..172c30f 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -137,6 +137,14 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
aarch64_def_or_undef (TARGET_CRYPTO, "__ARM_FEATURE_CRYPTO", pfile);
aarch64_def_or_undef (TARGET_SIMD_RDMA, "__ARM_FEATURE_QRDMX", pfile);
+ aarch64_def_or_undef (TARGET_AES, "__ARM_FEATURE_AES", pfile);
+ aarch64_def_or_undef (TARGET_SHA2, "__ARM_FEATURE_SHA2", pfile);
+ aarch64_def_or_undef (TARGET_SHA3, "__ARM_FEATURE_SHA3", pfile);
+ aarch64_def_or_undef (TARGET_SHA3, "__ARM_FEATURE_SHA512", pfile);
+ aarch64_def_or_undef (TARGET_SM4, "__ARM_FEATURE_SM3", pfile);
+ aarch64_def_or_undef (TARGET_SM4, "__ARM_FEATURE_SM4", pfile);
+ aarch64_def_or_undef (TARGET_F16FML, "__ARM_FEATURE_FP16_FML", pfile);
+
/* Not for ACLE, but required to keep "float.h" correct if we switch
target between implementations that do or do not support ARMv8.2-A
16-bit floating-point extensions. */
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 22952dd..3ed1118 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -35,6 +35,7 @@ VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */
VECTOR_MODES (FLOAT, 8); /* V2SF. */
VECTOR_MODES (FLOAT, 16); /* V4SF V2DF. */
VECTOR_MODE (FLOAT, DF, 1); /* V1DF. */
+VECTOR_MODE (FLOAT, HF, 2); /* V2HF. */
/* Oct Int: 256-bit integer mode needed for 32-byte vector arguments. */
INT_MODE (OI, 32);
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 4003fb9..593dad9 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -39,16 +39,22 @@
that are required. Their order is not important. */
/* Enabling "fp" just enables "fp".
- Disabling "fp" also disables "simd", "crypto" and "fp16". */
-AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_F16, "fp")
+ Disabling "fp" also disables "simd", "crypto", "fp16", "aes", "sha2", "sha3", and sm3/sm4. */
+AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO |\
+ AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2 |\
+ AARCH64_FL_SHA3 | AARCH64_FL_SM4, "fp")
/* Enabling "simd" also enables "fp".
- Disabling "simd" also disables "crypto" and "dotprod". */
-AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, AARCH64_FL_CRYPTO | AARCH64_FL_DOTPROD, "asimd")
+ Disabling "simd" also disables "crypto", "dotprod", "aes", "sha2", "sha3" and "sm3/sm4". */
+AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, AARCH64_FL_CRYPTO |\
+ AARCH64_FL_DOTPROD | AARCH64_FL_AES | AARCH64_FL_SHA2 |\
+ AARCH64_FL_SHA3 | AARCH64_FL_SM4, "asimd")
-/* Enabling "crypto" also enables "fp", "simd".
- Disabling "crypto" just disables "crypto". */
-AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | AARCH64_FL_SIMD, 0, "aes pmull sha1 sha2")
+/* Enabling "crypto" also enables "fp" and "simd".
+ Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4". */
+AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | AARCH64_FL_SIMD,\
+ AARCH64_FL_AES | AARCH64_FL_SHA2 |AARCH64_FL_SHA3 | AARCH64_FL_SM4,\
+ "aes pmull sha1 sha2")
/* Enabling or disabling "crc" only changes "crc". */
AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, "crc32")
@@ -57,8 +63,8 @@ AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, "crc32")
AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, 0, 0, "atomics")
/* Enabling "fp16" also enables "fp".
- Disabling "fp16" just disables "fp16". */
-AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, 0, "fphp asimdhp")
+ Disabling "fp16" disables "fp16" and "fp16fml". */
+AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, AARCH64_FL_F16FML, "fphp asimdhp")
/* Enabling or disabling "rcpc" only changes "rcpc". */
AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, "lrcpc")
@@ -71,4 +77,24 @@ AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, AARCH64_FL_FP | AARCH64_FL_SIMD,
Disabling "dotprod" only disables "dotprod". */
AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_SIMD, 0, "asimddp")
+/* Enabling "aes" also enables "simd".
+ Disabling "aes" just disables "aes". */
+AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_SIMD, 0, "aes")
+
+/* Enabling "sha2" also enables "simd".
+ Disabling "sha2" just disables "sha2". */
+AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_SIMD, 0, "sha1 sha2")
+
+/* Enabling "sha3" enables "simd" and "sha2".
+ Disabling "sha3" just disables "sha3". */
+AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_SIMD | AARCH64_FL_SHA2, 0, "sha3 sha512")
+
+/* Enabling "sm4" also enables "simd".
+ Disabling "sm4" just disables "sm4". */
+AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, 0, "sm3 sm4")
+
+/* Enabling "fp16fml" also enables "fp" and "fp16".
+ Disabling "fp16fml" just disables "fp16fml". */
+AARCH64_OPT_EXTENSION("fp16fml", AARCH64_FL_F16FML, AARCH64_FL_FP | AARCH64_FL_F16, 0, "asimdfml")
+
#undef AARCH64_OPT_EXTENSION
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 38392d1..b383f24 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -576,3 +576,69 @@
BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
+
+ /* Implemented by aarch64_sm3ss1qv4si. */
+ VAR1 (TERNOPU, sm3ss1q, 0, v4si)
+ /* Implemented by aarch64_sm3tt<sm3tt_op>qv4si. */
+ VAR1 (QUADOPUI, sm3tt1aq, 0, v4si)
+ VAR1 (QUADOPUI, sm3tt1bq, 0, v4si)
+ VAR1 (QUADOPUI, sm3tt2aq, 0, v4si)
+ VAR1 (QUADOPUI, sm3tt2bq, 0, v4si)
+ /* Implemented by aarch64_sm3partw<sm3part_op>qv4si. */
+ VAR1 (TERNOPU, sm3partw1q, 0, v4si)
+ VAR1 (TERNOPU, sm3partw2q, 0, v4si)
+ /* Implemented by aarch64_sm4eqv4si. */
+ VAR1 (BINOPU, sm4eq, 0, v4si)
+ /* Implemented by aarch64_sm4ekeyqv4si. */
+ VAR1 (BINOPU, sm4ekeyq, 0, v4si)
+ /* Implemented by aarch64_crypto_sha512hqv2di. */
+ VAR1 (TERNOPU, crypto_sha512hq, 0, v2di)
+ /* Implemented by aarch64_sha512h2qv2di. */
+ VAR1 (TERNOPU, crypto_sha512h2q, 0, v2di)
+ /* Implemented by aarch64_crypto_sha512su0qv2di. */
+ VAR1 (BINOPU, crypto_sha512su0q, 0, v2di)
+ /* Implemented by aarch64_crypto_sha512su1qv2di. */
+ VAR1 (TERNOPU, crypto_sha512su1q, 0, v2di)
+ /* Implemented by aarch64_eor3qv8hi. */
+ VAR1 (TERNOPU, eor3q, 0, v8hi)
+ /* Implemented by aarch64_rax1qv2di. */
+ VAR1 (BINOPU, rax1q, 0, v2di)
+ /* Implemented by aarch64_xarqv2di. */
+ VAR1 (TERNOPUI, xarq, 0, v2di)
+ /* Implemented by aarch64_bcaxqv8hi. */
+ VAR1 (TERNOPU, bcaxq, 0, v8hi)
+
+ /* Implemented by aarch64_fml<f16mac1>l<f16quad>_low<mode>. */
+ VAR1 (TERNOP, fmlal_low, 0, v2sf)
+ VAR1 (TERNOP, fmlsl_low, 0, v2sf)
+ VAR1 (TERNOP, fmlalq_low, 0, v4sf)
+ VAR1 (TERNOP, fmlslq_low, 0, v4sf)
+ /* Implemented by aarch64_fml<f16mac1>l<f16quad>_high<mode>. */
+ VAR1 (TERNOP, fmlal_high, 0, v2sf)
+ VAR1 (TERNOP, fmlsl_high, 0, v2sf)
+ VAR1 (TERNOP, fmlalq_high, 0, v4sf)
+ VAR1 (TERNOP, fmlslq_high, 0, v4sf)
+ /* Implemented by aarch64_fml<f16mac1>l_lane_lowv2sf. */
+ VAR1 (QUADOP_LANE, fmlal_lane_low, 0, v2sf)
+ VAR1 (QUADOP_LANE, fmlsl_lane_low, 0, v2sf)
+ /* Implemented by aarch64_fml<f16mac1>l_laneq_lowv2sf. */
+ VAR1 (QUADOP_LANE, fmlal_laneq_low, 0, v2sf)
+ VAR1 (QUADOP_LANE, fmlsl_laneq_low, 0, v2sf)
+ /* Implemented by aarch64_fml<f16mac1>lq_lane_lowv4sf. */
+ VAR1 (QUADOP_LANE, fmlalq_lane_low, 0, v4sf)
+ VAR1 (QUADOP_LANE, fmlslq_lane_low, 0, v4sf)
+ /* Implemented by aarch64_fml<f16mac1>lq_laneq_lowv4sf. */
+ VAR1 (QUADOP_LANE, fmlalq_laneq_low, 0, v4sf)
+ VAR1 (QUADOP_LANE, fmlslq_laneq_low, 0, v4sf)
+ /* Implemented by aarch64_fml<f16mac1>l_lane_highv2sf. */
+ VAR1 (QUADOP_LANE, fmlal_lane_high, 0, v2sf)
+ VAR1 (QUADOP_LANE, fmlsl_lane_high, 0, v2sf)
+ /* Implemented by aarch64_fml<f16mac1>l_laneq_highv2sf. */
+ VAR1 (QUADOP_LANE, fmlal_laneq_high, 0, v2sf)
+ VAR1 (QUADOP_LANE, fmlsl_laneq_high, 0, v2sf)
+ /* Implemented by aarch64_fml<f16mac1>lq_lane_highv4sf. */
+ VAR1 (QUADOP_LANE, fmlalq_lane_high, 0, v4sf)
+ VAR1 (QUADOP_LANE, fmlslq_lane_high, 0, v4sf)
+ /* Implemented by aarch64_fml<f16mac1>lq_laneq_highv4sf. */
+ VAR1 (QUADOP_LANE, fmlalq_laneq_high, 0, v4sf)
+ VAR1 (QUADOP_LANE, fmlslq_laneq_high, 0, v4sf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 5a85f82..5b3db91 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5782,7 +5782,7 @@
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
(match_operand:V16QI 2 "register_operand" "w")]
CRYPTO_AES))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_AES"
"aes<aes_op>\\t%0.16b, %2.16b"
[(set_attr "type" "crypto_aese")]
)
@@ -5797,7 +5797,7 @@
[(set (match_operand:V16QI 0 "register_operand" "=w,w")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
CRYPTO_AESMC))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_AES"
"aes<aesmc_op>\\t%0.16b, %1.16b"
[(set_attr "type" "crypto_aesmc")
(set_attr_alternative "enabled"
@@ -5815,7 +5815,7 @@
(unspec:SI [(match_operand:SI 1
"register_operand" "w")]
UNSPEC_SHA1H))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_SHA2"
"sha1h\\t%s0, %s1"
[(set_attr "type" "crypto_sha1_fast")]
)
@@ -5825,7 +5825,7 @@
(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
(parallel [(const_int 0)]))]
UNSPEC_SHA1H))]
- "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
+ "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
"sha1h\\t%s0, %s1"
[(set_attr "type" "crypto_sha1_fast")]
)
@@ -5835,7 +5835,7 @@
(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
(parallel [(const_int 3)]))]
UNSPEC_SHA1H))]
- "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
+ "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
"sha1h\\t%s0, %s1"
[(set_attr "type" "crypto_sha1_fast")]
)
@@ -5845,7 +5845,7 @@
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")]
UNSPEC_SHA1SU1))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_SHA2"
"sha1su1\\t%0.4s, %2.4s"
[(set_attr "type" "crypto_sha1_fast")]
)
@@ -5856,7 +5856,7 @@
(match_operand:SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
CRYPTO_SHA1))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_SHA2"
"sha1<sha1_op>\\t%q0, %s2, %3.4s"
[(set_attr "type" "crypto_sha1_slow")]
)
@@ -5867,7 +5867,7 @@
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
UNSPEC_SHA1SU0))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_SHA2"
"sha1su0\\t%0.4s, %2.4s, %3.4s"
[(set_attr "type" "crypto_sha1_xor")]
)
@@ -5880,7 +5880,7 @@
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
CRYPTO_SHA256))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_SHA2"
"sha256h<sha256_op>\\t%q0, %q2, %3.4s"
[(set_attr "type" "crypto_sha256_slow")]
)
@@ -5890,7 +5890,7 @@
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")]
UNSPEC_SHA256SU0))]
- "TARGET_SIMD &&TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_SHA2"
"sha256su0\\t%0.4s, %2.4s"
[(set_attr "type" "crypto_sha256_fast")]
)
@@ -5901,11 +5901,746 @@
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
UNSPEC_SHA256SU1))]
- "TARGET_SIMD &&TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_SHA2"
"sha256su1\\t%0.4s, %2.4s, %3.4s"
[(set_attr "type" "crypto_sha256_slow")]
)
+;; sha512
+
+(define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=w")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "w")
+ (match_operand:V2DI 3 "register_operand" "w")]
+ CRYPTO_SHA512))]
+ "TARGET_SIMD && TARGET_SHA3"
+ "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
+ [(set_attr "type" "crypto_sha512")]
+)
+
+(define_insn "aarch64_crypto_sha512su0qv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=w")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "w")]
+ UNSPEC_SHA512SU0))]
+ "TARGET_SIMD && TARGET_SHA3"
+ "sha512su0\\t%0.2d, %2.2d"
+ [(set_attr "type" "crypto_sha512")]
+)
+
+(define_insn "aarch64_crypto_sha512su1qv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=w")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "w")
+ (match_operand:V2DI 3 "register_operand" "w")]
+ UNSPEC_SHA512SU1))]
+ "TARGET_SIMD && TARGET_SHA3"
+ "sha512su1\\t%0.2d, %2.2d, %3.2d"
+ [(set_attr "type" "crypto_sha512")]
+)
+
+;; sha3
+
+(define_insn "aarch64_eor3qv8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
+ (xor:V8HI
+ (xor:V8HI
+ (match_operand:V8HI 2 "register_operand" "%w")
+ (match_operand:V8HI 3 "register_operand" "w"))
+ (match_operand:V8HI 1 "register_operand" "w")))]
+ "TARGET_SIMD && TARGET_SHA3"
+ "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
+ [(set_attr "type" "crypto_sha3")]
+)
+
+(define_insn "aarch64_rax1qv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=w")
+ (xor:V2DI
+ (rotate:V2DI
+ (match_operand:V2DI 2 "register_operand" "w")
+ (const_int 1))
+ (match_operand:V2DI 1 "register_operand" "w")))]
+ "TARGET_SIMD && TARGET_SHA3"
+ "rax1\\t%0.2d, %1.2d, %2.2d"
+ [(set_attr "type" "crypto_sha3")]
+)
+
+(define_insn "aarch64_xarqv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=w")
+ (rotatert:V2DI
+ (xor:V2DI
+ (match_operand:V2DI 1 "register_operand" "%w")
+ (match_operand:V2DI 2 "register_operand" "w"))
+ (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
+ "TARGET_SIMD && TARGET_SHA3"
+ "xar\\t%0.2d, %1.2d, %2.2d, %3"
+ [(set_attr "type" "crypto_sha3")]
+)
+
+(define_insn "aarch64_bcaxqv8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
+ (xor:V8HI
+ (and:V8HI
+ (not:V8HI (match_operand:V8HI 3 "register_operand" "w"))
+ (match_operand:V8HI 2 "register_operand" "w"))
+ (match_operand:V8HI 1 "register_operand" "w")))]
+ "TARGET_SIMD && TARGET_SHA3"
+ "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
+ [(set_attr "type" "crypto_sha3")]
+)
+
+;; SM3
+
+(define_insn "aarch64_sm3ss1qv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
+ (match_operand:V4SI 2 "register_operand" "w")
+ (match_operand:V4SI 3 "register_operand" "w")]
+ UNSPEC_SM3SS1))]
+ "TARGET_SIMD && TARGET_SM4"
+ "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
+ [(set_attr "type" "crypto_sm3")]
+)
+
+
+(define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "register_operand" "w")
+ (match_operand:V4SI 3 "register_operand" "w")
+ (match_operand:SI 4 "aarch64_imm2" "Ui2")]
+ CRYPTO_SM3TT))]
+ "TARGET_SIMD && TARGET_SM4"
+ "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
+ [(set_attr "type" "crypto_sm3")]
+)
+
+(define_insn "aarch64_sm3partw<sm3part_op>qv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "register_operand" "w")
+ (match_operand:V4SI 3 "register_operand" "w")]
+ CRYPTO_SM3PART))]
+ "TARGET_SIMD && TARGET_SM4"
+ "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
+ [(set_attr "type" "crypto_sm3")]
+)
+
+;; SM4
+
+(define_insn "aarch64_sm4eqv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "register_operand" "w")]
+ UNSPEC_SM4E))]
+ "TARGET_SIMD && TARGET_SM4"
+ "sm4e\\t%0.4s, %2.4s"
+ [(set_attr "type" "crypto_sm4")]
+)
+
+(define_insn "aarch64_sm4ekeyqv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
+ (match_operand:V4SI 2 "register_operand" "w")]
+ UNSPEC_SM4EKEY))]
+ "TARGET_SIMD && TARGET_SM4"
+ "sm4ekey\\t%0.4s, %1.4s, %2.4s"
+ [(set_attr "type" "crypto_sm4")]
+)
+
+;; fp16fml
+
+(define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
+ [(set (match_operand:VDQSF 0 "register_operand" "=w")
+ (unspec:VDQSF
+ [(match_operand:VDQSF 1 "register_operand" "0")
+ (match_operand:<VFMLA_W> 2 "register_operand" "w")
+ (match_operand:<VFMLA_W> 3 "register_operand" "w")]
+ VFMLA16_LOW))]
+ "TARGET_F16FML"
+{
+ int nunits = GET_MODE_NUNITS (<VFMLA_W>mode);
+ rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, nunits, false);
+ rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, nunits, false);
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, p2));
+ DONE;
+
+})
+
+(define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
+ [(set (match_operand:VDQSF 0 "register_operand" "=w")
+ (unspec:VDQSF
+ [(match_operand:VDQSF 1 "register_operand" "0")
+ (match_operand:<VFMLA_W> 2 "register_operand" "w")
+ (match_operand:<VFMLA_W> 3 "register_operand" "w")]
+ VFMLA16_HIGH))]
+ "TARGET_F16FML"
+{
+ int nunits = GET_MODE_NUNITS (<VFMLA_W>mode);
+ rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, nunits, true);
+ rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, nunits, true);
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, p2));
+ DONE;
+})
+
+(define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
+ [(set (match_operand:VDQSF 0 "register_operand" "=w")
+ (fma:VDQSF
+ (float_extend:VDQSF
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 2 "register_operand" "w")
+ (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
+ (float_extend:VDQSF
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 3 "register_operand" "w")
+ (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
+ (match_operand:VDQSF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
+ [(set (match_operand:VDQSF 0 "register_operand" "=w")
+ (fma:VDQSF
+ (float_extend:VDQSF
+ (neg:<VFMLA_SEL_W>
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 2 "register_operand" "w")
+ (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
+ (float_extend:VDQSF
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 3 "register_operand" "w")
+ (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
+ (match_operand:VDQSF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
+ [(set (match_operand:VDQSF 0 "register_operand" "=w")
+ (fma:VDQSF
+ (float_extend:VDQSF
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 2 "register_operand" "w")
+ (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
+ (float_extend:VDQSF
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 3 "register_operand" "w")
+ (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
+ (match_operand:VDQSF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
+ [(set (match_operand:VDQSF 0 "register_operand" "=w")
+ (fma:VDQSF
+ (float_extend:VDQSF
+ (neg:<VFMLA_SEL_W>
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 2 "register_operand" "w")
+ (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
+ (float_extend:VDQSF
+ (vec_select:<VFMLA_SEL_W>
+ (match_operand:<VFMLA_W> 3 "register_operand" "w")
+ (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
+ (match_operand:VDQSF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
+ (match_operand:V4HF 2 "register_operand" "")
+ (match_operand:V4HF 3 "register_operand" "")
+ (match_operand:SI 4 "aarch64_imm2" "")]
+ VFMLA16_LOW))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
+ GET_MODE_NUNITS (V4HFmode),
+ false);
+ rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+}
+)
+
+(define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
+ (match_operand:V4HF 2 "register_operand" "")
+ (match_operand:V4HF 3 "register_operand" "")
+ (match_operand:SI 4 "aarch64_imm2" "")]
+ VFMLA16_HIGH))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
+ GET_MODE_NUNITS (V4HFmode),
+ true);
+ rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+})
+
+(define_insn "aarch64_simd_fmlal_lane_lowv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (neg:V2HF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlal_lane_highv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlsl_lane_highv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (neg:V2HF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V8HF 2 "register_operand" "")
+ (match_operand:V8HF 3 "register_operand" "")
+ (match_operand:SI 4 "aarch64_lane_imm3" "")]
+ VFMLA16_LOW))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
+ GET_MODE_NUNITS (V8HFmode),
+ false);
+ rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+})
+
+(define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V8HF 2 "register_operand" "")
+ (match_operand:V8HF 3 "register_operand" "")
+ (match_operand:SI 4 "aarch64_lane_imm3" "")]
+ VFMLA16_HIGH))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
+ GET_MODE_NUNITS (V8HFmode),
+ true);
+
+ rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+})
+
+(define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (neg:V4HF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (neg:V4HF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
+ (match_operand:V4HF 2 "register_operand" "")
+ (match_operand:V8HF 3 "register_operand" "")
+ (match_operand:SI 4 "aarch64_lane_imm3" "")]
+ VFMLA16_LOW))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
+ GET_MODE_NUNITS (V4HFmode),
+ false);
+ rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+
+})
+
+(define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
+ (match_operand:V4HF 2 "register_operand" "")
+ (match_operand:V8HF 3 "register_operand" "")
+ (match_operand:SI 4 "aarch64_lane_imm3" "")]
+ VFMLA16_HIGH))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode,
+ GET_MODE_NUNITS(V4HFmode),
+ true);
+ rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+
+})
+
+(define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (neg:V2HF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlal_laneq_highv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=w")
+ (fma:V2SF
+ (float_extend:V2SF
+ (neg:V2HF
+ (vec_select:V2HF
+ (match_operand:V4HF 2 "register_operand" "w")
+ (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
+ (float_extend:V2SF
+ (vec_duplicate:V2HF
+ (vec_select:HF
+ (match_operand:V8HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V8HF 2 "register_operand" "")
+ (match_operand:V4HF 3 "register_operand" "")
+ (match_operand:SI 4 "aarch64_imm2" "")]
+ VFMLA16_LOW))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
+ GET_MODE_NUNITS (V8HFmode),
+ false);
+
+ rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+})
+
+(define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V8HF 2 "register_operand" "")
+ (match_operand:V4HF 3 "register_operand" "")
+ (match_operand:SI 4 "aarch64_imm2" "")]
+ VFMLA16_HIGH))]
+ "TARGET_F16FML"
+{
+ rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode,
+ GET_MODE_NUNITS (V8HFmode),
+ true);
+ rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
+
+ emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
+ operands[1],
+ operands[2],
+ operands[3],
+ p1, lane));
+ DONE;
+})
+
+(define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (neg:V4HF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlalq_lane_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
+(define_insn "aarch64_simd_fmlslq_lane_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (fma:V4SF
+ (float_extend:V4SF
+ (neg:V4HF
+ (vec_select:V4HF
+ (match_operand:V8HF 2 "register_operand" "w")
+ (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
+ (float_extend:V4SF
+ (vec_duplicate:V4HF
+ (vec_select:HF
+ (match_operand:V4HF 3 "register_operand" "x")
+ (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
+ (match_operand:V4SF 1 "register_operand" "0")))]
+ "TARGET_F16FML"
+ "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
+ [(set_attr "type" "neon_fp_mul_s")]
+)
+
;; pmull
(define_insn "aarch64_crypto_pmulldi"
@@ -5913,7 +6648,7 @@
(unspec:TI [(match_operand:DI 1 "register_operand" "w")
(match_operand:DI 2 "register_operand" "w")]
UNSPEC_PMULL))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_AES"
"pmull\\t%0.1q, %1.1d, %2.1d"
[(set_attr "type" "crypto_pmull")]
)
@@ -5923,7 +6658,7 @@
(unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
(match_operand:V2DI 2 "register_operand" "w")]
UNSPEC_PMULL2))]
- "TARGET_SIMD && TARGET_CRYPTO"
+ "TARGET_SIMD && TARGET_AES"
"pmull2\\t%0.1q, %1.2d, %2.2d"
[(set_attr "type" "crypto_pmull")]
)
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index c60b5f4..82412e8 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -148,6 +148,14 @@ extern unsigned aarch64_architecture_version;
#define AARCH64_FL_V8_3 (1 << 10) /* Has ARMv8.3-A features. */
#define AARCH64_FL_RCPC (1 << 11) /* Has support for RCpc model. */
#define AARCH64_FL_DOTPROD (1 << 12) /* Has ARMv8.2-A Dot Product ins. */
+/* New flags to split crypto into aes and sha2. */
+#define AARCH64_FL_AES (1 << 13) /* Has Crypto AES. */
+#define AARCH64_FL_SHA2 (1 << 14) /* Has Crypto SHA2. */
+/* ARMv8.4-A architecture extensions. */
+#define AARCH64_FL_V8_4 (1 << 15) /* Has ARMv8.4-A features. */
+#define AARCH64_FL_SM4 (1 << 16) /* Has ARMv8.4-A SM3 and SM4. */
+#define AARCH64_FL_SHA3 (1 << 17) /* Has ARMv8.4-a SHA3 and SHA512. */
+#define AARCH64_FL_F16FML (1 << 18) /* Has ARMv8.4-a FP16 extensions. */
/* Has FP and SIMD. */
#define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD)
@@ -164,6 +172,8 @@ extern unsigned aarch64_architecture_version;
(AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2)
#define AARCH64_FL_FOR_ARCH8_3 \
(AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3)
+#define AARCH64_FL_FOR_ARCH8_4 \
+ (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML)
/* Macros to test ISA flags. */
@@ -177,10 +187,31 @@ extern unsigned aarch64_architecture_version;
#define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16)
#define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3)
#define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD)
+#define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES)
+#define AARCH64_ISA_SHA2 (aarch64_isa_flags & AARCH64_FL_SHA2)
+#define AARCH64_ISA_V8_4 (aarch64_isa_flags & AARCH64_FL_V8_4)
+#define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4)
+#define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3)
+#define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML)
/* Crypto is an optional extension to AdvSIMD. */
#define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
+/* SHA2 is an optional extension to AdvSIMD. */
+#define TARGET_SHA2 ((TARGET_SIMD && AARCH64_ISA_SHA2) || TARGET_CRYPTO)
+
+/* SHA3 is an optional extension to AdvSIMD. */
+#define TARGET_SHA3 (TARGET_SIMD && AARCH64_ISA_SHA3)
+
+/* AES is an optional extension to AdvSIMD. */
+#define TARGET_AES ((TARGET_SIMD && AARCH64_ISA_AES) || TARGET_CRYPTO)
+
+/* SM is an optional extension to AdvSIMD. */
+#define TARGET_SM4 (TARGET_SIMD && AARCH64_ISA_SM4)
+
+/* FP16FML is an optional extension to AdvSIMD. */
+#define TARGET_F16FML (TARGET_SIMD && AARCH64_ISA_F16FML && TARGET_FP_F16INST)
+
/* CRC instructions that can be enabled through +crc arch extension. */
#define TARGET_CRC32 (AARCH64_ISA_CRC)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index e45ade4..c45c29a 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -31970,6 +31970,322 @@ vdotq_laneq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b, const int __index)
}
#pragma GCC pop_options
+#pragma GCC push_options
+#pragma GCC target(("arch=armv8.2-a+sm4"))
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsm3ss1q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+ return __builtin_aarch64_sm3ss1qv4si_uuuu (__a, __b, __c);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsm3tt1aq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c, const int __imm2)
+{
+ return __builtin_aarch64_sm3tt1aqv4si_uuuus (__a, __b, __c, __imm2);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsm3tt1bq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c, const int __imm2)
+{
+ return __builtin_aarch64_sm3tt1bqv4si_uuuus (__a, __b, __c, __imm2);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsm3tt2aq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c, const int __imm2)
+{
+ return __builtin_aarch64_sm3tt2aqv4si_uuuus (__a, __b, __c, __imm2);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsm3tt2bq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c, const int __imm2)
+{
+ return __builtin_aarch64_sm3tt2bqv4si_uuuus (__a, __b, __c, __imm2);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsm3partw1q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+ return __builtin_aarch64_sm3partw1qv4si_uuuu (__a, __b, __c);
+}
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsm3partw2q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+ return __builtin_aarch64_sm3partw2qv4si_uuuu (__a, __b, __c);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsm4eq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+ return __builtin_aarch64_sm4eqv4si_uuu (__a, __b);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsm4ekeyq_u32 (uint32x4_t __a, uint32x4_t __b)
+{
+ return __builtin_aarch64_sm4ekeyqv4si_uuu (__a, __b);
+}
+
+#pragma GCC pop_options
+
+#pragma GCC push_options
+#pragma GCC target(("arch=armv8.2-a+crypto"))
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsha512hq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+{
+ return __builtin_aarch64_crypto_sha512hqv2di_uuuu (__a, __b, __c);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsha512h2q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+{
+ return __builtin_aarch64_crypto_sha512h2qv2di_uuuu (__a, __b, __c);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsha512su0q_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+ return __builtin_aarch64_crypto_sha512su0qv2di_uuu (__a, __b);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsha512su1q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+{
+ return __builtin_aarch64_crypto_sha512su1qv2di_uuuu (__a, __b, __c);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+{
+ return __builtin_aarch64_eor3qv8hi_uuuu (__a, __b, __c);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrax1q_u64 (uint64x2_t __a, uint64x2_t __b)
+{
+ return __builtin_aarch64_rax1qv2di_uuu (__a, __b);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vxarq_u64 (uint64x2_t __a, uint64x2_t __b, const int imm6)
+{
+ return __builtin_aarch64_xarqv2di_uuus (__a, __b,imm6);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
+{
+ return __builtin_aarch64_bcaxqv8hi_uuuu (__a, __b, __c);
+}
+#pragma GCC pop_options
+
+#pragma GCC push_options
+#pragma GCC target(("arch=armv8.2-a+fp16fml"))
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlal_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_fmlal_lowv2sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlsl_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_fmlsl_lowv2sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlalq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_fmlalq_lowv4sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlslq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_fmlslq_lowv4sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlal_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_fmlal_highv2sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlsl_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+{
+ return __builtin_aarch64_fmlsl_highv2sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlalq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_fmlalq_highv4sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlslq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+{
+ return __builtin_aarch64_fmlslq_highv4sf (__r, __a, __b);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlal_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlal_lane_lowv2sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlsl_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlsl_lane_lowv2sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlal_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlal_laneq_lowv2sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlsl_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlsl_laneq_lowv2sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlalq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlalq_lane_lowv4sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlslq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlslq_lane_lowv4sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlalq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlalq_laneq_lowv4sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlslq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlslq_laneq_lowv4sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlal_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlal_lane_highv2sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlsl_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlsl_lane_highv2sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlal_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlal_laneq_highv2sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlsl_laneq_highv2sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlalq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlalq_lane_highv4sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlslq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlslq_lane_highv4sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlalq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlalq_laneq_highv4sf (__r, __a, __b, __lane);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vfmlslq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+ const int __lane)
+{
+ return __builtin_aarch64_fmlslq_laneq_highv4sf (__r, __a, __b, __lane);
+}
+
+#pragma GCC pop_options
+
#undef __aarch64_vget_lane_any
#undef __aarch64_vdup_lane_any
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 3ca7ec7..18adbc6 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -139,12 +139,24 @@
A constraint that matches the immediate constant +1."
(match_test "op == const1_rtx"))
+(define_constraint "Ui2"
+ "@internal
+ A constraint that matches the integers 0...3."
+ (and (match_code "const_int")
+ (match_test "(unsigned HOST_WIDE_INT) ival <= 3")))
+
(define_constraint "Ui3"
"@internal
A constraint that matches the integers 0...4."
(and (match_code "const_int")
(match_test "(unsigned HOST_WIDE_INT) ival <= 4")))
+(define_constraint "Ui7"
+ "@internal
+ A constraint that matches the integers 0...7."
+ (and (match_code "const_int")
+ (match_test "(unsigned HOST_WIDE_INT) ival <= 7")))
+
(define_constraint "Up3"
"@internal
A constraint that matches the integers 2^(0...4)."
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b5e9f37..e199dfd 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -361,6 +361,23 @@
UNSPEC_FMINNM ; Used in aarch64-simd.md.
UNSPEC_SDOT ; Used in aarch64-simd.md.
UNSPEC_UDOT ; Used in aarch64-simd.md.
+ UNSPEC_SM3SS1 ; Used in aarch64-simd.md.
+ UNSPEC_SM3TT1A ; Used in aarch64-simd.md.
+ UNSPEC_SM3TT1B ; Used in aarch64-simd.md.
+ UNSPEC_SM3TT2A ; Used in aarch64-simd.md.
+ UNSPEC_SM3TT2B ; Used in aarch64-simd.md.
+ UNSPEC_SM3PARTW1 ; Used in aarch64-simd.md.
+ UNSPEC_SM3PARTW2 ; Used in aarch64-simd.md.
+ UNSPEC_SM4E ; Used in aarch64-simd.md.
+ UNSPEC_SM4EKEY ; Used in aarch64-simd.md.
+ UNSPEC_SHA512H ; Used in aarch64-simd.md.
+ UNSPEC_SHA512H2 ; Used in aarch64-simd.md.
+ UNSPEC_SHA512SU0 ; Used in aarch64-simd.md.
+ UNSPEC_SHA512SU1 ; Used in aarch64-simd.md.
+ UNSPEC_FMLAL ; Used in aarch64-simd.md.
+ UNSPEC_FMLSL ; Used in aarch64-simd.md.
+ UNSPEC_FMLAL2 ; Used in aarch64-simd.md.
+ UNSPEC_FMLSL2 ; Used in aarch64-simd.md.
])
;; ------------------------------------------------------------------
@@ -843,6 +860,15 @@
;; No need of iterator for -fPIC as it use got_lo12 for both modes.
(define_mode_attr got_modifier [(SI "gotpage_lo14") (DI "gotpage_lo15")])
+;; Width of 2nd and 3rd arguments to fp16 vector multiply add/sub
+(define_mode_attr VFMLA_W [(V2SF "V4HF") (V4SF "V8HF")])
+
+(define_mode_attr VFMLA_SEL_W [(V2SF "V2HF") (V4SF "V4HF")])
+
+(define_mode_attr f16quad [(V2SF "") (V4SF "q")])
+
+(define_code_attr f16mac [(plus "a") (minus "s")])
+
;; -------------------------------------------------------------------
;; Code Iterators
;; -------------------------------------------------------------------
@@ -1140,6 +1166,19 @@
(define_int_iterator CRYPTO_SHA256 [UNSPEC_SHA256H UNSPEC_SHA256H2])
+(define_int_iterator CRYPTO_SHA512 [UNSPEC_SHA512H UNSPEC_SHA512H2])
+
+(define_int_iterator CRYPTO_SM3TT [UNSPEC_SM3TT1A UNSPEC_SM3TT1B
+ UNSPEC_SM3TT2A UNSPEC_SM3TT2B])
+
+(define_int_iterator CRYPTO_SM3PART [UNSPEC_SM3PARTW1 UNSPEC_SM3PARTW2])
+
+;; Iterators for fp16 operations
+
+(define_int_iterator VFMLA16_LOW [UNSPEC_FMLAL UNSPEC_FMLSL])
+
+(define_int_iterator VFMLA16_HIGH [UNSPEC_FMLAL2 UNSPEC_FMLSL2])
+
;; Iterators for atomic operations.
(define_int_iterator ATOMIC_LDOP
@@ -1312,3 +1351,13 @@
(define_int_attr sha256_op [(UNSPEC_SHA256H "") (UNSPEC_SHA256H2 "2")])
(define_int_attr rdma_as [(UNSPEC_SQRDMLAH "a") (UNSPEC_SQRDMLSH "s")])
+
+(define_int_attr sha512_op [(UNSPEC_SHA512H "") (UNSPEC_SHA512H2 "2")])
+
+(define_int_attr sm3tt_op [(UNSPEC_SM3TT1A "1a") (UNSPEC_SM3TT1B "1b")
+ (UNSPEC_SM3TT2A "2a") (UNSPEC_SM3TT2B "2b")])
+
+(define_int_attr sm3part_op [(UNSPEC_SM3PARTW1 "1") (UNSPEC_SM3PARTW2 "2")])
+
+(define_int_attr f16mac1 [(UNSPEC_FMLAL "a") (UNSPEC_FMLSL "s")
+ (UNSPEC_FMLAL2 "a") (UNSPEC_FMLSL2 "s")])
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 10259c0..65b2df6 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -160,6 +160,18 @@
(and (match_code "const_int")
(match_test "(unsigned HOST_WIDE_INT) INTVAL (op) <= 4")))
+;; The imm2 field is a 2-bit field that only accepts immediates in the
+;; range 0..3.
+(define_predicate "aarch64_imm2"
+ (and (match_code "const_int")
+ (match_test "UINTVAL (op) <= 3")))
+
+;; The imm3 field is a 3-bit field that only accepts immediates in the
+;; range 0..7.
+(define_predicate "aarch64_lane_imm3"
+ (and (match_code "const_int")
+ (match_test "UINTVAL (op) <= 7")))
+
;; An immediate that fits into 24 bits.
(define_predicate "aarch64_imm24"
(and (match_code "const_int")
diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
index 11eb207..055cb3e 100644
--- a/gcc/config/arm/types.md
+++ b/gcc/config/arm/types.md
@@ -1084,6 +1084,10 @@
crypto_sha256_fast,\
crypto_sha256_slow,\
crypto_pmull,\
+ crypto_sha512,\
+ crypto_sha3,\
+ crypto_sm3,\
+ crypto_sm4,\
coproc"
(const_string "untyped"))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 928561d..e4768b8 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14481,7 +14481,11 @@ more feature modifiers. This option has the form
@option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}.
The permissible values for @var{arch} are @samp{armv8-a},
-@samp{armv8.1-a}, @samp{armv8.2-a}, @samp{armv8.3-a} or @var{native}.
+@samp{armv8.1-a}, @samp{armv8.2-a}, @samp{armv8.3-a} or @samp{armv8.4-a}
+or @var{native}.
+
+The value @samp{armv8.4-a} implies @samp{armv8.3-a} and enables compiler
+support for the ARMv8.4-A architecture extensions.
The value @samp{armv8.3-a} implies @samp{armv8.2-a} and enables compiler
support for the ARMv8.3-A architecture extensions.
@@ -14619,18 +14623,34 @@ Enable Round Double Multiply Accumulate instructions. This is on by default
for @option{-march=armv8.1-a}.
@item fp16
Enable FP16 extension. This also enables floating-point instructions.
+@item fp16fml
+Enable FP16 fmla extension. This also enables FP16 extensions and
+floating-point instructions. This option is enabled by default for @option{-march=armv8.4-a}. Use of this option with architectures prior to Armv8.2-A is not supported.
+
@item rcpc
Enable the RcPc extension. This does not change code generation from GCC,
but is passed on to the assembler, enabling inline asm statements to use
instructions from the RcPc extension.
@item dotprod
Enable the Dot Product extension. This also enables Advanced SIMD instructions.
+@item aes
+Enable the Armv8-a aes and pmull crypto extension. This also enables Advanced
+SIMD instructions.
+@item sha2
+Enable the Armv8-a sha2 crypto extension. This also enables Advanced SIMD instructions.
+@item sha3
+Enable the sha512 and sha3 crypto extension. This also enables Advanced SIMD
+instructions. Use of this option with architectures prior to Armv8.2-A is not supported.
+@item sm4
+Enable the sm3 and sm4 crypto extension. This also enables Advanced SIMD instructions.
+Use of this option with architectures prior to Armv8.2-A is not supported.
@end table
-Feature @option{crypto} implies @option{simd}, which implies @option{fp}.
+Feature @option{crypto} implies @option{aes}, @option{sha2}, and @option{simd},
+which implies @option{fp}.
Conversely, @option{nofp} implies @option{nosimd}, which implies
-@option{nocrypto}.
+@option{nocrypto}, @option{noaes} and @option{nosha2}.
@node Adapteva Epiphany Options
@subsection Adapteva Epiphany Options
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8572bfc..209d6d8 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,37 @@
+2018-01-10 Michael Collison <michael.collison@arm.com>
+
+ gcc.target/aarch64/fp16_fmul_high_1.c: New testcase.
+ gcc.target/aarch64/fp16_fmul_high_2.c: New testcase.
+ gcc.target/aarch64/fp16_fmul_high_3.c: New testcase.
+ gcc.target/aarch64/fp16_fmul_high.h: New shared testcase.
+ gcc.target/aarch64/fp16_fmul_lane_high_1.c: New testcase.
+ gcc.target/aarch64/fp16_fmul_lane_high_1.c: New testcase.
+ gcc.target/aarch64/fp16_fmul_lane_high_1.c: New testcase.
+ gcc.target/aarch64/fp16_fmul_lane_high.h: New shared testcase.
+ gcc.target/aarch64/fp16_fmul_low_1.c: New testcase.
+ gcc.target/aarch64/fp16_fmul_low_2.c: New testcase.
+ gcc.target/aarch64/fp16_fmul_low_2.c: New testcase.
+ gcc.target/aarch64/fp16_fmul_low.h: New sharedtestcase.
+ gcc.target/aarch64/fp16_fmul_lane_low_1.c: New testcase.
+ gcc.target/aarch64/fp16_fmul_lane_low_2.c: New testcase.
+ gcc.target/aarch64/fp16_fmul_lane_low_3.c: New testcase.
+ gcc.target/aarch64/fp16_fmul_lane_low.h: New shared testcase.
+
+2018-01-10 Michael Collison <michael.collison@arm.com>
+
+ gcc.target/aarch64/sha2.h: New shared testcase.
+ gcc.target/aarch64/sha2_1.c: New testcase.
+ gcc.target/aarch64/sha2_2.c: New testcase.
+ gcc.target/aarch64/sha2_3.c: New testcase.
+ gcc.target/aarch64/sha3.h: New shared testcase.
+ gcc.target/aarch64/sha3_1.c: New testcase.
+ gcc.target/aarch64/sha3_2.c: New testcase.
+ gcc.target/aarch64/sha3_3.c: New testcase.
+
+2018-01-10 Michael Collison <michael.collison@arm.com>
+
+ gcc.target/aarch64/sm3_sm4.c: New testcase.
+
2018-01-10 Martin Sebor <msebor@redhat.com>
PR tree-optimization/83781
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h
new file mode 100644
index 0000000..9c86bd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h
@@ -0,0 +1,25 @@
+#include "arm_neon.h"
+
+float32x2_t
+test_vfmlal_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+{
+ return vfmlal_high_u32 (r, a, b);
+}
+
+float32x4_t
+test_vfmlalq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+{
+ return vfmlalq_high_u32 (r, a, b);
+}
+
+float32x2_t
+test_vfmlsl_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+{
+ return vfmlsl_high_u32 (r, a, b);
+}
+
+float32x4_t
+test_vfmlslq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+{
+ return vfmlslq_high_u32 (r, a, b);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high_1.c b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high_1.c
new file mode 100644
index 0000000..26cf219f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high_1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.2-a+fp16fml" } */
+
+#include "fp16_fmul_high.h"
+
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.2h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.2h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high_2.c b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high_2.c
new file mode 100644
index 0000000..a82938d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high_2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.3-a+fp16fml" } */
+
+#include "fp16_fmul_high.h"
+
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.2h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.2h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high_3.c b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high_3.c
new file mode 100644
index 0000000..be1d35a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high_3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.4-a+fp16" } */
+
+#include "fp16_fmul_high.h"
+
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.2h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.2h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h
new file mode 100644
index 0000000..1039347
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h
@@ -0,0 +1,49 @@
+#include "arm_neon.h"
+
+float32x2_t
+test_vfmlal_lane_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+{
+ return vfmlal_lane_high_u32 (r, a, b, 0);
+}
+
+float32x2_t
+tets_vfmlsl_lane_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+{
+ return vfmlsl_lane_high_u32 (r, a, b, 0);
+}
+
+float32x2_t
+test_vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+{
+ return vfmlal_laneq_high_u32 (r, a, b, 6);
+}
+
+float32x2_t
+test_vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+{
+ return vfmlsl_laneq_high_u32 (r, a, b, 6);
+}
+
+float32x4_t
+test_vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+{
+ return vfmlalq_lane_high_u32 (r, a, b, 1);
+}
+
+float32x4_t
+test_vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+{
+ return vfmlslq_lane_high_u32 (r, a, b, 1);
+}
+
+float32x4_t
+test_vfmlalq_laneq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+{
+ return vfmlalq_laneq_high_u32 (r, a, b, 7);
+}
+
+float32x4_t
+test_vfmlslq_laneq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+{
+ return vfmlslq_laneq_high_u32 (r, a, b, 7);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high_1.c b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high_1.c
new file mode 100644
index 0000000..30be8d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high_1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.2-a+fp16fml" } */
+
+#include "fp16_fmul_lane_high.h"
+
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[6\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[6\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[7\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[7\\\]" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high_2.c b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high_2.c
new file mode 100644
index 0000000..b6920a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high_2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.3-a+fp16fml" } */
+
+#include "fp16_fmul_lane_high.h"
+
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[6\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[6\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[7\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[7\\\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high_3.c b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high_3.c
new file mode 100644
index 0000000..65a35e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high_3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.4-a+fp16" } */
+
+#include "fp16_fmul_lane_high.h"
+
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[6\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[6\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[7\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[7\\\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h
new file mode 100644
index 0000000..b689741
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h
@@ -0,0 +1,49 @@
+#include "arm_neon.h"
+
+float32x2_t
+test_vfmlal_lane_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+{
+ return vfmlal_lane_low_u32 (r, a, b, 0);
+}
+
+float32x2_t
+test_vfmlsl_lane_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+{
+ return vfmlsl_lane_low_u32 (r, a, b, 0);
+}
+
+float32x2_t
+test_vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+{
+ return vfmlal_laneq_low_u32 (r, a, b, 6);
+}
+
+float32x2_t
+test_vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+{
+ return vfmlsl_laneq_low_u32 (r, a, b, 6);
+}
+
+float32x4_t
+test_vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+{
+ return vfmlalq_lane_low_u32 (r, a, b, 1);
+}
+
+float32x4_t
+test_vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+{
+ return vfmlslq_lane_low_u32 (r, a, b, 1);
+}
+
+float32x4_t
+test_vfmlalq_laneq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+{
+ return vfmlalq_laneq_low_u32 (r, a, b, 7);
+}
+
+float32x4_t
+test_vfmlslq_laneq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+{
+ return vfmlslq_laneq_low_u32 (r, a, b, 7);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low_1.c b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low_1.c
new file mode 100644
index 0000000..1b56845
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low_1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.2-a+fp16fml" } */
+
+#include "fp16_fmul_lane_low.h"
+
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[6\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[6\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[7\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[7\\\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low_2.c b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low_2.c
new file mode 100644
index 0000000..d42aef2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low_2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.3-a+fp16fml" } */
+
+#include "fp16_fmul_lane_low.h"
+
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[6\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[6\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[7\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[7\\\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low_3.c b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low_3.c
new file mode 100644
index 0000000..a06c423
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low_3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.4-a+fp16" } */
+
+#include "fp16_fmul_lane_low.h"
+
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[6\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.h\\\[6\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[7\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[7\\\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h
new file mode 100644
index 0000000..778ca1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h
@@ -0,0 +1,25 @@
+#include "arm_neon.h"
+
+float32x2_t
+test_vfmlal_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+{
+ return vfmlal_low_u32 (r, a, b);
+}
+
+float32x4_t
+test_vfmlalq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+{
+ return vfmlalq_low_u32 (r, a, b);
+}
+
+float32x2_t
+test_vfmlsl_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+{
+ return vfmlsl_low_u32 (r, a, b);
+}
+
+float32x4_t
+test_vfmlslq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+{
+ return vfmlslq_low_u32 (r, a, b);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low_1.c b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low_1.c
new file mode 100644
index 0000000..90d611d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low_1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.2-a+fp16fml" } */
+
+#include "fp16_fmul_low.h"
+
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.2h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.2h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low_2.c b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low_2.c
new file mode 100644
index 0000000..b83f4a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low_2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.3-a+fp16fml" } */
+
+#include "fp16_fmul_low.h"
+
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.2h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.2h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low_3.c b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low_3.c
new file mode 100644
index 0000000..6b12d59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low_3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.4-a+fp16" } */
+
+#include "fp16_fmul_low.h"
+
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.2h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlal\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.2s, v\[0-9\]+\.2h, v\[0-9\]+\.2h" 1 } } */
+/* { dg-final { scan-assembler-times "fmlsl\\tv\[0-9\]+\.4s, v\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sha2.h b/gcc/testsuite/gcc.target/aarch64/sha2.h
new file mode 100644
index 0000000..d67c687
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sha2.h
@@ -0,0 +1,25 @@
+#include "arm_neon.h"
+
+uint64x2_t
+test_vsha512hq_u64 (uint64x2_t a, uint64x2_t b, uint64x2_t c)
+{
+ return vsha512hq_u64 (a, b, c);
+}
+
+uint64x2_t
+test_vsha512h2q_u64 (uint64x2_t a, uint64x2_t b, uint64x2_t c)
+{
+ return vsha512h2q_u64 (a, b, c);
+}
+
+uint64x2_t
+test_vsha512su0q_u64 (uint64x2_t a, uint64x2_t b)
+{
+ return vsha512su0q_u64 (a, b);
+}
+
+uint64x2_t
+test_vsha512su1q_u64 (uint64x2_t a, uint64x2_t b, uint64x2_t c)
+{
+ return vsha512su1q_u64 (a, b, c);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sha2_1.c b/gcc/testsuite/gcc.target/aarch64/sha2_1.c
new file mode 100644
index 0000000..55a5d89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sha2_1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.2-a+sha3" } */
+
+#include "sha2.h"
+
+/* { dg-final { scan-assembler-times "sha512h\\tq\[0-9\]+, q\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
+/* { dg-final { scan-assembler-times "sha512h2\\tq\[0-9\]+, q\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
+/* { dg-final { scan-assembler-times "sha512su0\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
+/* { dg-final { scan-assembler-times "sha512su1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sha2_2.c b/gcc/testsuite/gcc.target/aarch64/sha2_2.c
new file mode 100644
index 0000000..5387b92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sha2_2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.3-a+sha3" } */
+
+#include "sha2.h"
+
+/* { dg-final { scan-assembler-times "sha512h\\tq\[0-9\]+, q\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
+/* { dg-final { scan-assembler-times "sha512h2\\tq\[0-9\]+, q\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
+/* { dg-final { scan-assembler-times "sha512su0\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
+/* { dg-final { scan-assembler-times "sha512su1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sha2_3.c b/gcc/testsuite/gcc.target/aarch64/sha2_3.c
new file mode 100644
index 0000000..52374c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sha2_3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.4-a+sha3" } */
+
+#include "sha2.h"
+
+/* { dg-final { scan-assembler-times "sha512h\\tq\[0-9\]+, q\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
+/* { dg-final { scan-assembler-times "sha512h2\\tq\[0-9\]+, q\[0-9\]+, v\[0-9\]+\.2d" 1 } } */
+/* { dg-final { scan-assembler-times "sha512su0\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
+/* { dg-final { scan-assembler-times "sha512su1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3.h b/gcc/testsuite/gcc.target/aarch64/sha3.h
new file mode 100644
index 0000000..76dd193
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sha3.h
@@ -0,0 +1,25 @@
+#include "arm_neon.h"
+
+uint16x8_t
+test_veor3q_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
+{
+ return veor3q_u16 (a, b, c);
+}
+
+uint64x2_t
+test_vrax1q_u64 (uint64x2_t a, uint64x2_t b)
+{
+ return vrax1q_u64 (a, b);
+}
+
+uint64x2_t
+test_vxarq_u64 (uint64x2_t a, uint64x2_t b)
+{
+ return vxarq_u64 (a, b, 15);
+}
+
+uint16x8_t
+test_vbcaxq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
+{
+ return vbcaxq_u16 (a, b, c);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_1.c b/gcc/testsuite/gcc.target/aarch64/sha3_1.c
new file mode 100644
index 0000000..879eadd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sha3_1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.2-a+sha3" } */
+
+#include "sha3.h"
+
+
+/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
+/* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */
+/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_2.c b/gcc/testsuite/gcc.target/aarch64/sha3_2.c
new file mode 100644
index 0000000..2afe28c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sha3_2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.3-a+sha3" } */
+
+#include "sha3.h"
+
+/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
+/* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */
+/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_3.c b/gcc/testsuite/gcc.target/aarch64/sha3_3.c
new file mode 100644
index 0000000..8915c80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sha3_3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.4-a+sha3" } */
+
+#include "sha3.h"
+
+/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
+/* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */
+/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sm3_sm4.c b/gcc/testsuite/gcc.target/aarch64/sm3_sm4.c
new file mode 100644
index 0000000..b8981e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sm3_sm4.c
@@ -0,0 +1,78 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8.4-a+sm4" } */
+
+#include "arm_neon.h"
+
+uint32x4_t
+test_vsm3ss1q_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
+{
+ return vsm3ss1q_u32 (a, b, c);
+}
+
+/* { dg-final { scan-assembler-times "sm3ss1\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
+
+uint32x4_t
+test_vsm3tt1aq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
+{
+ return vsm3tt1aq_u32 (a, b, c, 3);
+}
+
+/* { dg-final { scan-assembler-times "sm3tt1a\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s\\\[3\\\]" 1 } } */
+
+uint32x4_t
+test_vsm3tt1bq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
+{
+ return vsm3tt1bq_u32 (a, b, c, 1);
+}
+
+/* { dg-final { scan-assembler-times "sm3tt1b\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s\\\[1\\\]" 1 } } */
+
+uint32x4_t
+test_vsm3tt2aq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
+{
+ return vsm3tt2aq_u32 (a, b, c, 2);
+}
+
+/* { dg-final { scan-assembler-times "sm3tt2a\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s\\\[2\\\]" 1 } } */
+
+uint32x4_t
+test_vsm3tt2bq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
+{
+ return vsm3tt2bq_u32 (a, b, c, 3);
+}
+
+/* { dg-final { scan-assembler-times "sm3tt2b\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s\\\[3\\\]" 1 } } */
+
+uint32x4_t
+test_vsm3partw1q_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
+{
+ return vsm3partw1q_u32 (a, b, c);
+}
+
+/* { dg-final { scan-assembler-times "sm3partw1\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
+
+uint32x4_t
+test_vsm3partw2q_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
+{
+ return vsm3partw2q_u32 (a, b, c);
+}
+
+/* { dg-final { scan-assembler-times "sm3partw2\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
+
+// SM4
+
+uint32x4_t
+test_vsm4eq_u32 (uint32x4_t a, uint32x4_t b)
+{
+ return vsm4eq_u32 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "sm4e\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
+
+uint32x4_t
+test_vsm4ekeyq_u32 (uint32x4_t a, uint32x4_t b)
+{
+ return vsm4ekeyq_u32 (a, b);
+}
+
+/* { dg-final { scan-assembler-times "sm4ekey\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */