aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/aarch64/arm_neon.h
diff options
context:
space:
mode:
authorBilyan Borisov <bilyan.borisov@arm.com>2015-11-03 10:58:27 +0000
committerJames Greenhalgh <jgreenhalgh@gcc.gnu.org>2015-11-03 10:58:27 +0000
commit496ea87db62abbf97da6d5791b4d5fb406350767 (patch)
treee3ed7379a3febcc2d0f88561c8224375ac46b4e1 /gcc/config/aarch64/arm_neon.h
parent04f725b90712254596630764cb2d5c69585d7337 (diff)
downloadgcc-496ea87db62abbf97da6d5791b4d5fb406350767.zip
gcc-496ea87db62abbf97da6d5791b4d5fb406350767.tar.gz
gcc-496ea87db62abbf97da6d5791b4d5fb406350767.tar.bz2
[AARCH64][PATCH 1/3] Implementing the variants of the vmulx_ NEON intrinsic
gcc/ * config/aarch64/aarch64-simd-builtins.def (fmulx): New. * config/aarch64/aarch64-simd.md (aarch64_fmulx<mode>): New. * config/aarch64/arm_neon.h (vmulx_f32): Rewrite to call fmulx builtin. (vmulxq_f32): Likewise. (vmulx_f64): New. (vmulxq_f64): Rewrite to call fmulx builtin. (vmulxs_f32): Likewise. (vmulxd_f64): Likewise. (vmulx_lane_f32): Remove. * config/aarch64/iterators.md (UNSPEC): Add fmulx. gcc/testsuite/ * gcc/testsuite/gcc.target/aarch64/simd/vmulx_f32_1.c: New. * gcc/testsuite/gcc.target/aarch64/simd/vmulx_f64_1.c: New. * gcc/testsuite/gcc.target/aarch64/simd/vmulxq_f32_1.c: New. * gcc/testsuite/gcc.target/aarch64/simd/vmulxq_f64_1.c: New. * gcc/testsuite/gcc.target/aarch64/simd/vmulxs_f32_1.c: New. * gcc/testsuite/gcc.target/aarch64/simd/vmulxd_f64_1.c: New. From-SVN: r229702
Diffstat (limited to 'gcc/config/aarch64/arm_neon.h')
-rw-r--r--gcc/config/aarch64/arm_neon.h105
1 files changed, 37 insertions, 68 deletions
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index e186348..265c266 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -8509,63 +8509,6 @@ vmulq_n_u32 (uint32x4_t a, uint32_t b)
return result;
}
-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
-vmulx_f32 (float32x2_t a, float32x2_t b)
-{
- float32x2_t result;
- __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-#define vmulx_lane_f32(a, b, c) \
- __extension__ \
- ({ \
- float32x4_t b_ = (b); \
- float32x2_t a_ = (a); \
- float32x2_t result; \
- __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
- : "=w"(result) \
- : "w"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vmulxd_f64 (float64_t a, float64_t b)
-{
- float64_t result;
- __asm__ ("fmulx %d0, %d1, %d2"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
-vmulxq_f32 (float32x4_t a, float32x4_t b)
-{
- float32x4_t result;
- __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
-vmulxq_f64 (float64x2_t a, float64x2_t b)
-{
- float64x2_t result;
- __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
#define vmulxq_lane_f32(a, b, c) \
__extension__ \
({ \
@@ -8592,17 +8535,6 @@ vmulxq_f64 (float64x2_t a, float64x2_t b)
result; \
})
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
-vmulxs_f32 (float32_t a, float32_t b)
-{
- float32_t result;
- __asm__ ("fmulx %s0, %s1, %s2"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vmvn_p8 (poly8x8_t a)
{
@@ -17739,6 +17671,43 @@ vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
(int32x4_t) __b);
}
+/* vmulx */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmulx_f32 (float32x2_t __a, float32x2_t __b)
+{
+ return __builtin_aarch64_fmulxv2sf (__a, __b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmulxq_f32 (float32x4_t __a, float32x4_t __b)
+{
+ return __builtin_aarch64_fmulxv4sf (__a, __b);
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vmulx_f64 (float64x1_t __a, float64x1_t __b)
+{
+ return (float64x1_t) {__builtin_aarch64_fmulxdf (__a[0], __b[0])};
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vmulxq_f64 (float64x2_t __a, float64x2_t __b)
+{
+ return __builtin_aarch64_fmulxv2df (__a, __b);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vmulxs_f32 (float32_t __a, float32_t __b)
+{
+ return __builtin_aarch64_fmulxsf (__a, __b);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vmulxd_f64 (float64_t __a, float64_t __b)
+{
+ return __builtin_aarch64_fmulxdf (__a, __b);
+}
/* vpmax */