diff options
Diffstat (limited to 'gcc/config/arm/arm_neon.h')
-rw-r--r-- | gcc/config/arm/arm_neon.h | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 5cec7dd..af1f747 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -11393,6 +11393,38 @@ vst1q_s64_x3 (int64_t * __a, int64x2x3_t __b) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s8_x4 (int8_t * __a, int8x16x4_t __b) +{ + union { int8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s16_x4 (int16_t * __a, int16x8x4_t __b) +{ + union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s32_x4 (int32_t * __a, int32x4x4_t __b) +{ + union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s64_x4 (int64_t * __a, int64x2x4_t __b) +{ + union { int64x2x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v2di ((__builtin_neon_di *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s8_x3 (int8_t * __a, int8x8x3_t __b) { union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; @@ -11736,6 +11768,14 @@ vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t __b) __builtin_neon_vst1q_x3v2di ((__builtin_neon_di *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p64_x4 (poly64_t * __a, poly64x2x4_t __b) +{ + union { poly64x2x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v2di ((__builtin_neon_di *) __a, __bu.__o); +} + #pragma GCC pop_options __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11817,6 +11857,24 @@ vst1q_f32_x3 (float32_t * __a, float32x4x3_t __b) __builtin_neon_vst1q_x3v4sf (__a, __bu.__o); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_f16_x4 (float16_t * __a, float16x8x4_t __b) +{ + union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v8hf (__a, __bu.__o); +} +#endif + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_f32_x4 (float32_t * __a, float32x4x4_t __b) +{ + union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v4sf (__a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u8 (uint8_t * __a, uint8x16_t __b) @@ -11911,6 +11969,38 @@ vst1q_u64_x3 (uint64_t * __a, uint64x2x3_t __b) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u8_x4 (uint8_t * __a, uint8x16x4_t __b) +{ + union { uint8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u16_x4 (uint16_t * __a, uint16x8x4_t __b) +{ + union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u32_x4 (uint32_t * __a, uint32x4x4_t __b) +{ + union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u64_x4 (uint64_t * __a, uint64x2x4_t __b) +{ + union { uint64x2x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v2di ((__builtin_neon_di *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p8 (poly8_t * __a, poly8x16_t __b) { __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b); @@ -11957,6 +12047,22 @@ vst1q_p16_x3 (poly16_t * __a, poly16x8x3_t __b) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p8_x4 (poly8_t * __a, poly8x16x4_t __b) +{ + union { poly8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p16_x4 (poly16_t * __a, poly16x8x4_t __b) +{ + union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c) { __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, __b, __c); @@ -20648,6 +20754,14 @@ vst1q_bf16_x3 (bfloat16_t * __a, bfloat16x8x3_t __b) } __extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_bf16_x4 (bfloat16_t * __a, bfloat16x8x4_t __b) +{ + union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v8bf (__a, __bu.__o); +} + +__extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_bf16 (bfloat16_t * __ptr, bfloat16x4x2_t __val) { |