diff options
author | Delia Burduv <delia.burduv@arm.com> | 2020-02-06 09:45:52 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2020-02-06 16:40:12 +0000 |
commit | f78335df69993a900512f92324cab6a20b1bde0c (patch) | |
tree | 33e340eb3dd645f4c181157b35d3c731f71f11e5 /gcc | |
parent | ad84548336ad9a08b451ddd7ea64f07aee0576a6 (diff) | |
download | gcc-f78335df69993a900512f92324cab6a20b1bde0c.zip gcc-f78335df69993a900512f92324cab6a20b1bde0c.tar.gz gcc-f78335df69993a900512f92324cab6a20b1bde0c.tar.bz2 |
aarch64: ACLE intrinsics bfmmla and bfmlal<b/t>
This patch adds the ARMv8.6 ACLE intrinsics for bfmmla, bfmlalb and
bfmlalt as part of the BFloat16 extension.
(https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics)
The intrinsics are declared in arm_neon.h and the RTL patterns are
defined in aarch64-simd.md. Two new tests are added to check assembler
output.
2020-02-06 Delia Burduv <delia.burduv@arm.com>
gcc/
* config/aarch64/aarch64-simd-builtins.def
(bfmlaq): New built-in function.
(bfmlalb): New built-in function.
(bfmlalt): New built-in function.
(bfmlalb_lane): New built-in function.
(bfmlalt_lane): New built-in function.
* config/aarch64/aarch64-simd.md
(aarch64_bfmmlaqv4sf): New pattern.
(aarch64_bfmlal<bt>v4sf): New pattern.
(aarch64_bfmlal<bt>_lane<q>v4sf): New pattern.
* config/aarch64/arm_neon.h (vbfmmlaq_f32): New intrinsic.
(vbfmlalbq_f32): New intrinsic.
(vbfmlaltq_f32): New intrinsic.
(vbfmlalbq_lane_f32): New intrinsic.
(vbfmlaltq_lane_f32): New intrinsic.
(vbfmlalbq_laneq_f32): New intrinsic.
(vbfmlaltq_laneq_f32): New intrinsic.
* config/aarch64/iterators.md (BF_MLA): New int iterator.
(bt): New int attribute.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 22 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 11 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 39 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 54 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmlalbt-compile.c | 67 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmmla-compile.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbfmlalbt_lane_f32_indices_1.c | 46 |
8 files changed, 262 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c842c4f..1fe29d3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2020-02-06 Delia Burduv <delia.burduv@arm.com> + + * config/aarch64/aarch64-simd-builtins.def + (bfmlaq): New built-in function. + (bfmlalb): New built-in function. + (bfmlalt): New built-in function. + (bfmlalb_lane): New built-in function. + (bfmlalt_lane): New built-in function. + * config/aarch64/aarch64-simd.md + (aarch64_bfmmlaqv4sf): New pattern. + (aarch64_bfmlal<bt>v4sf): New pattern. + (aarch64_bfmlal<bt>_lane<q>v4sf): New pattern. + * config/aarch64/arm_neon.h (vbfmmlaq_f32): New intrinsic. + (vbfmlalbq_f32): New intrinsic. + (vbfmlaltq_f32): New intrinsic. + (vbfmlalbq_lane_f32): New intrinsic. + (vbfmlaltq_lane_f32): New intrinsic. + (vbfmlalbq_laneq_f32): New intrinsic. + (vbfmlaltq_laneq_f32): New intrinsic. + * config/aarch64/iterators.md (BF_MLA): New int iterator. + (bt): New int attribute. + 2020-02-06 Uroš Bizjak <ubizjak@gmail.com> * config/i386/i386.md (*pushtf): Emit "#" instead of diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index a118f4f..02b2154 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -692,3 +692,14 @@ VAR2 (TERNOP, bfdot, 0, v2sf, v4sf) VAR2 (QUADOP_LANE_PAIR, bfdot_lane, 0, v2sf, v4sf) VAR2 (QUADOP_LANE_PAIR, bfdot_laneq, 0, v2sf, v4sf) + + /* Implemented by aarch64_bfmmlaqv4sf */ + VAR1 (TERNOP, bfmmlaq, 0, v4sf) + + /* Implemented by aarch64_bfmlal<bt>{_lane{q}}v4sf */ + VAR1 (TERNOP, bfmlalb, 0, v4sf) + VAR1 (TERNOP, bfmlalt, 0, v4sf) + VAR1 (QUADOP_LANE, bfmlalb_lane, 0, v4sf) + VAR1 (QUADOP_LANE, bfmlalt_lane, 0, v4sf) + VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, v4sf) + VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, v4sf) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 5a58051..f2b440c 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7134,3 +7134,42 @@ } [(set_attr "type" "neon_dot<VDQSF:q>")] ) + +;; bfmmla +(define_insn "aarch64_bfmmlaqv4sf" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") + (match_operand:V8BF 3 "register_operand" "w")] + UNSPEC_BFMMLA)))] + "TARGET_BF16_SIMD" + "bfmmla\\t%0.4s, %2.8h, %3.8h" + [(set_attr "type" "neon_fp_mla_s_q")] +) + +;; bfmlal<bt> +(define_insn "aarch64_bfmlal<bt>v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") + (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") + (match_operand:V8BF 3 "register_operand" "w")] + BF_MLA)))] + "TARGET_BF16_SIMD" + "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h" + [(set_attr "type" "neon_fp_mla_s_q")] +) + +(define_insn "aarch64_bfmlal<bt>_lane<q>v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") + (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") + (match_operand:VBF 3 "register_operand" "w") + (match_operand:SI 4 "const_int_operand" "n")] + BF_MLA)))] + "TARGET_BF16_SIMD" +{ + operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4])); + return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]"; +} + [(set_attr "type" "neon_fp_mla_s_scalar_q")] +) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 7f05c3f..db845a3 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -34660,6 +34660,60 @@ vbfdotq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, return __builtin_aarch64_bfdot_laneqv4sf (__r, __a, __b, __index); } +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vbfmmlaq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b) + +{ + return __builtin_aarch64_bfmmlaqv4sf (__r, __a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vbfmlalbq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b) +{ + return __builtin_aarch64_bfmlalbv4sf (__r, __a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vbfmlaltq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b) +{ + return __builtin_aarch64_bfmlaltv4sf (__r, __a, __b); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vbfmlalbq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b, + const int __index) +{ + return __builtin_aarch64_bfmlalb_lanev4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vbfmlaltq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b, + const int __index) +{ + return __builtin_aarch64_bfmlalt_lanev4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vbfmlalbq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, + const int __index) +{ + return __builtin_aarch64_bfmlalb_lane_qv4sf (__r, __a, __b, __index); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, + const int __index) +{ + return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index); +} + #pragma GCC pop_options /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 3e3fd9d..7c62f16 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -2620,6 +2620,9 @@ (define_int_iterator FMMLA [UNSPEC_FMMLA]) +(define_int_iterator BF_MLA [UNSPEC_BFMLALB + UNSPEC_BFMLALT]) + ;; Iterators for atomic operations. (define_int_iterator ATOMIC_LDOP @@ -2871,6 +2874,8 @@ (define_int_attr ab [(UNSPEC_CLASTA "a") (UNSPEC_CLASTB "b") (UNSPEC_LASTA "a") (UNSPEC_LASTB "b")]) +(define_int_attr bt [(UNSPEC_BFMLALB "b") (UNSPEC_BFMLALT "t")]) + (define_int_attr addsub [(UNSPEC_SHADD "add") (UNSPEC_UHADD "add") (UNSPEC_SRHADD "add") diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmlalbt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmlalbt-compile.c new file mode 100644 index 0000000..9810e4b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmlalbt-compile.c @@ -0,0 +1,67 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include <arm_neon.h> + +/* +**test_bfmlalb: +** bfmlalb v0.4s, v1.8h, v2.8h +** ret +*/ +float32x4_t test_bfmlalb (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) +{ + return vbfmlalbq_f32 (r, a, b); +} + +/* +**test_bfmlalt: +** bfmlalt v0.4s, v1.8h, v2.8h +** ret +*/ +float32x4_t test_bfmlalt (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) +{ + return vbfmlaltq_f32 (r, a, b); +} + +/* +**test_bfmlalb_lane: +** bfmlalb v0.4s, v1.8h, v2.h[0] +** ret +*/ +float32x4_t test_bfmlalb_lane (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) +{ + return vbfmlalbq_lane_f32 (r, a, b, 0); +} + +/* +**test_bfmlalt_lane: +** bfmlalt v0.4s, v1.8h, v2.h[2] +** ret +*/ +float32x4_t test_bfmlalt_lane (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) +{ + return vbfmlaltq_lane_f32 (r, a, b, 2); +} + +/* +**test_bfmlalb_laneq: +** bfmlalb v0.4s, v1.8h, v2.h[4] +** ret +*/ +float32x4_t test_bfmlalb_laneq (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) +{ + return vbfmlalbq_laneq_f32 (r, a, b, 4); +} + +/* +**test_bfmlalt_laneq: +** bfmlalt v0.4s, v1.8h, v2.h[7] +** ret +*/ +float32x4_t test_bfmlalt_laneq (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) +{ + return vbfmlaltq_laneq_f32 (r, a, b, 7); +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmmla-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmmla-compile.c new file mode 100644 index 0000000..0aaa69f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmmla-compile.c @@ -0,0 +1,18 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include <arm_neon.h> + + +/* +**test_bfmmla: +** bfmmla v0.4s, v1.8h, v2.8h +** ret +*/ +float32x4_t test_bfmmla (float32x4_t r, bfloat16x8_t x, bfloat16x8_t y) +{ + return vbfmmlaq_f32 (r, x, y); +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbfmlalbt_lane_f32_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbfmlalbt_lane_f32_indices_1.c new file mode 100644 index 0000000..4d50ba3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbfmlalbt_lane_f32_indices_1.c @@ -0,0 +1,46 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ + +#include <arm_neon.h> + +void +f_vbfmlaltq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) +{ + /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ + vbfmlaltq_lane_f32 (r, a, b, -1); + /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ + vbfmlaltq_lane_f32 (r, a, b, 4); + return; +} + +void +f_vbfmlaltq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) +{ + /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */ + vbfmlaltq_laneq_f32 (r, a, b, -1); + /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */ + vbfmlaltq_laneq_f32 (r, a, b, 8); + return; +} + +void +f_vbfmlalbq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) +{ + /* { dg-error "lane -2 out of range 0 - 3" "" { target *-*-* } 0 } */ + vbfmlalbq_lane_f32 (r, a, b, -2); + /* { dg-error "lane 5 out of range 0 - 3" "" { target *-*-* } 0 } */ + vbfmlalbq_lane_f32 (r, a, b, 5); + return; +} + +void +f_vbfmlalbq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) +{ + /* { dg-error "lane -2 out of range 0 - 7" "" { target *-*-* } 0 } */ + vbfmlalbq_laneq_f32 (r, a, b, -2); + /* { dg-error "lane 9 out of range 0 - 7" "" { target *-*-* } 0 } */ + vbfmlalbq_laneq_f32 (r, a, b, 9); + return; +} |