aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrea Corallo <andrea.corallo@arm.com>2020-10-21 11:16:01 +0200
committerAndrea Corallo <andrea.corallo@arm.com>2020-11-03 14:19:52 +0100
commitc9a0276840360dd21c91da1a883207833880f124 (patch)
tree99114d8e7504af72f5d28b66650523929b69ed30
parent444655b6f02605ae936426a14ba527795795587b (diff)
downloadgcc-c9a0276840360dd21c91da1a883207833880f124.zip
gcc-c9a0276840360dd21c91da1a883207833880f124.tar.gz
gcc-c9a0276840360dd21c91da1a883207833880f124.tar.bz2
arm: Add vld1_lane_bf16 + vldq_lane_bf16 intrinsics
gcc/ChangeLog 2020-10-21 Andrea Corallo <andrea.corallo@arm.com> * config/arm/arm_neon_builtins.def: Add to LOAD1LANE v4bf, v8bf. * config/arm/arm_neon.h (vld1_lane_bf16, vld1q_lane_bf16): Add intrinsics. gcc/testsuite/ChangeLog 2020-10-21 Andrea Corallo <andrea.corallo@arm.com> * gcc.target/arm/simd/vld1_lane_bf16_1.c: New testcase. * gcc.target/arm/simd/vld1_lane_bf16_indices_1.c: Likewise. * gcc.target/arm/simd/vld1q_lane_bf16_indices_1.c: Likewise.
-rw-r--r--gcc/config/arm/arm_neon.h14
-rw-r--r--gcc/config/arm/arm_neon_builtins.def4
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_1.c21
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_indices_1.c17
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/vld1q_lane_bf16_indices_1.c17
5 files changed, 71 insertions, 2 deletions
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index aa21730..fcd8020 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -19665,6 +19665,20 @@ vld4q_dup_bf16 (const bfloat16_t * __ptr)
return __rv.__i;
}
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1_lane_bf16 (const bfloat16_t * __a, bfloat16x4_t __b, const int __c)
+{
+ return __builtin_neon_vld1_lanev4bf (__a, __b, __c);
+}
+
+__extension__ extern __inline bfloat16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vld1q_lane_bf16 (const bfloat16_t * __a, bfloat16x8_t __b, const int __c)
+{
+ return __builtin_neon_vld1_lanev8bf (__a, __b, __c);
+}
+
#pragma GCC pop_options
#ifdef __cplusplus
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index 34c1945..d0617a4 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -312,8 +312,8 @@ VAR1 (TERNOP, vtbx3, v8qi)
VAR1 (TERNOP, vtbx4, v8qi)
VAR12 (LOAD1, vld1,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
-VAR10 (LOAD1LANE, vld1_lane,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR12 (LOAD1LANE, vld1_lane,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf)
VAR10 (LOAD1, vld1_dup,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR12 (STORE1, vst1,
diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_1.c
new file mode 100644
index 0000000..fa4e45b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_1.c
@@ -0,0 +1,21 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-O3 --save-temps" } */
+
+#include "arm_neon.h"
+
+bfloat16x4_t
+test_vld1_lane_bf16 (bfloat16_t *a, bfloat16x4_t b)
+{
+ return vld1_lane_bf16 (a, b, 1);
+}
+
+bfloat16x8_t
+test_vld1q_lane_bf16 (bfloat16_t *a, bfloat16x8_t b)
+{
+ return vld1q_lane_bf16 (a, b, 2);
+}
+
+/* { dg-final { scan-assembler "vld1.16\t{d0\\\[1\\\]}, \\\[r0\\\]" } } */
+/* { dg-final { scan-assembler "vld1.16\t{d0\\\[2\\\]}, \\\[r0\\\]" } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_indices_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_indices_1.c
new file mode 100644
index 0000000..c83eb53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vld1_lane_bf16_indices_1.c
@@ -0,0 +1,17 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include "arm_neon.h"
+
+bfloat16x4_t
+test_vld1_lane_bf16 (bfloat16_t *a, bfloat16x4_t b)
+{
+ bfloat16x4_t res;
+ res = vld1_lane_bf16 (a, b, -1);
+ res = vld1_lane_bf16 (a, b, 4);
+ return res;
+}
+
+/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_lane_bf16_indices_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_lane_bf16_indices_1.c
new file mode 100644
index 0000000..8e21e61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_lane_bf16_indices_1.c
@@ -0,0 +1,17 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include "arm_neon.h"
+
+bfloat16x8_t
+test_vld1q_lane_bf16 (bfloat16_t *a, bfloat16x8_t b)
+{
+ bfloat16x8_t res;
+ res = vld1q_lane_bf16 (a, b, -1);
+ res = vld1q_lane_bf16 (a, b, 8);
+ return res;
+}
+
+/* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+/* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */