diff options
author | Dennis Zhang <dennis.zhang@arm.com> | 2020-02-07 15:04:23 +0000 |
---|---|---|
committer | Dennis Zhang <dennis.zhang@arm.com> | 2020-02-07 15:04:23 +0000 |
commit | 40f648378061c170cf6a9ab680af01b3a3a83569 (patch) | |
tree | 7c67d016392906eb12ab5507e948ca8b10c074b1 /gcc | |
parent | b7903d9f5beb5db440e56fa057d32c6f13f7c5ec (diff) | |
download | gcc-40f648378061c170cf6a9ab680af01b3a3a83569.zip gcc-40f648378061c170cf6a9ab680af01b3a3a83569.tar.gz gcc-40f648378061c170cf6a9ab680af01b3a3a83569.tar.bz2 |
aarch64: ACLE I8MM multiply-accumulate intrinsics
This patch adds intrinsics for 8-bit integer matrix multiply-accumulate
operations including vmmlaq_s32, vmmlaq_u32, and vusmmlaq_s32.
gcc/ChangeLog:
2020-02-07 Dennis Zhang <dennis.zhang@arm.com>
* config/aarch64/aarch64-simd-builtins.def (simd_smmla): New entry.
(simd_ummla, simd_usmmla): Likewise.
* config/aarch64/aarch64-simd.md (aarch64_simd_<sur>mmlav16qi): New.
* config/aarch64/arm_neon.h (vmmlaq_s32, vmmlaq_u32): New.
(vusmmlaq_s32): New.
gcc/testsuite/ChangeLog:
2020-02-07 Dennis Zhang <dennis.zhang@arm.com>
* gcc.target/aarch64/simd/vmmla.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 5 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 12 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 23 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/vmmla.c | 27 |
6 files changed, 79 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d5d29f5..d255bd9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2020-02-07 Dennis Zhang <dennis.zhang@arm.com> + + * config/aarch64/aarch64-simd-builtins.def (simd_smmla): New entry. + (simd_ummla, simd_usmmla): Likewise. + * config/aarch64/aarch64-simd.md (aarch64_simd_<sur>mmlav16qi): New. + * config/aarch64/arm_neon.h (vmmlaq_s32, vmmlaq_u32): New. + (vusmmlaq_s32): New. + 2020-02-07 Richard Biener <rguenther@suse.de> PR middle-end/93519 diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 02b2154..fe3c7f1 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -703,3 +703,8 @@ VAR1 (QUADOP_LANE, bfmlalt_lane, 0, v4sf) VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, v4sf) VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, v4sf) + + /* Implemented by aarch64_simd_<sur>mmlav16qi. */ + VAR1 (TERNOP, simd_smmla, 0, v16qi) + VAR1 (TERNOPU, simd_ummla, 0, v16qi) + VAR1 (TERNOP_SSUS, simd_usmmla, 0, v16qi) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index f2b440c..c8e1012 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7173,3 +7173,15 @@ } [(set_attr "type" "neon_fp_mla_s_scalar_q")] ) + +;; 8-bit integer matrix multiply-accumulate +(define_insn "aarch64_simd_<sur>mmlav16qi" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (plus:V4SI + (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w") + (match_operand:V16QI 3 "register_operand" "w")] MATMUL) + (match_operand:V4SI 1 "register_operand" "0")))] + "TARGET_I8MM" + "<sur>mmla\\t%0.4s, %2.16b, %3.16b" + [(set_attr "type" "neon_mla_s_q")] +) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index db845a3..a6bcdf1 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -34797,6 +34797,29 @@ vsudotq_laneq_s32 (int32x4_t __r, int8x16_t __a, uint8x16_t __b, return __builtin_aarch64_sudot_laneqv16qi_sssus (__r, __a, __b, __index); } +/* Matrix Multiply-Accumulate. */ + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vmmlaq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b) +{ + return __builtin_aarch64_simd_smmlav16qi (__r, __a, __b); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vmmlaq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b) +{ + return __builtin_aarch64_simd_ummlav16qi_uuuu (__r, __a, __b); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vusmmlaq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b) +{ + return __builtin_aarch64_simd_usmmlav16qi_ssus (__r, __a, __b); +} + #pragma GCC pop_options #undef __aarch64_vget_lane_any diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 69f7223..7ec36cc 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2020-02-07 Dennis Zhang <dennis.zhang@arm.com> + + * gcc.target/aarch64/simd/vmmla.c: New test. + 2020-02-07 Richard Biener <rguenther@suse.de> PR middle-end/93519 diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c b/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c new file mode 100644 index 0000000..5eec2b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c @@ -0,0 +1,27 @@ +/* { dg-do assemble} */ +/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ +/* { dg-additional-options "-march=armv8.2-a+i8mm" } */ + +#include "arm_neon.h" + +int32x4_t +test_vmmlaq_s32 (int32x4_t r, int8x16_t a, int8x16_t b) +{ + return vmmlaq_s32 (r, a, b); +} + +uint32x4_t +test_vmmlaq_u32 (uint32x4_t r, uint8x16_t a, uint8x16_t b) +{ + return vmmlaq_u32 (r, a, b); +} + +int32x4_t +test_vusmmlaq_s32 (int32x4_t r, uint8x16_t a, int8x16_t b) +{ + return vusmmlaq_s32 (r, a, b); +} + +/* { dg-final { scan-assembler-times {\tsmmla\tv[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b} 1 } } */ +/* { dg-final { scan-assembler-times {\tummla\tv[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b} 1 } } */ +/* { dg-final { scan-assembler-times {\tusmmla\tv[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b} 1 } } */ |