aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJames Greenhalgh <james.greenhalgh@arm.com>2013-07-20 07:56:41 +0000
committerJames Greenhalgh <jgreenhalgh@gcc.gnu.org>2013-07-20 07:56:41 +0000
commitd05d070973b03dedf93c2007335d64ecf4f462ba (patch)
tree58d7089f06a85417607888f88459db109f4cba8e /gcc
parent7a29a1b527b8dd272523b4f70c2df12ec928da81 (diff)
downloadgcc-d05d070973b03dedf93c2007335d64ecf4f462ba.zip
gcc-d05d070973b03dedf93c2007335d64ecf4f462ba.tar.gz
gcc-d05d070973b03dedf93c2007335d64ecf4f462ba.tar.bz2
[AArch64] Rewrite vabs<q>_s<8,16,32,64> AdvSIMD intrinsics to fold to tree
gcc/ * config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Fold abs in all modes. * config/aarch64/aarch64-simd-builtins.def (abs): Enable for all modes. * config/aarch64/arm_neon.h (vabs<q>_s<8,16,32,64): Rewrite using builtins. (vabs_f64): Add missing intrinsic. gcc/testsuite/ * gcc.target/aarch64/vabs_intrinsic_1.c: New file. From-SVN: r201083
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog10
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c2
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def2
-rw-r--r--gcc/config/aarch64/arm_neon.h125
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c101
6 files changed, 165 insertions, 79 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 822639b0..b457e6c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2013-07-20 James Greenhalgh <james.greenhalgh@arm.com>
+
+ * config/aarch64/aarch64-builtins.c
+ (aarch64_fold_builtin): Fold abs in all modes.
+ * config/aarch64/aarch64-simd-builtins.def
+ (abs): Enable for all modes.
+ * config/aarch64/arm_neon.h
+ (vabs<q>_s<8,16,32,64): Rewrite using builtins.
+ (vabs_f64): Add missing intrinsic.
+
2013-07-19 Ian Bolton <ian.bolton@arm.com>
* config/aarch64/arm_neon.h (vabs_s64): New function
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index f49f06b..6816b9c 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1325,7 +1325,7 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
switch (fcode)
{
- BUILTIN_VDQF (UNOP, abs, 2)
+ BUILTIN_VALLDI (UNOP, abs, 2)
return fold_build1 (ABS_EXPR, type, args[0]);
break;
BUILTIN_VALLDI (BINOP, cmge, 0)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index af2dd6e..55dead6 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -347,7 +347,7 @@
BUILTIN_VDQF (UNOP, frecpe, 0)
BUILTIN_VDQF (BINOP, frecps, 0)
- BUILTIN_VDQF (UNOP, abs, 2)
+ BUILTIN_VALLDI (UNOP, abs, 2)
VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf)
VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 122fd7d..99cf123 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -4468,83 +4468,6 @@ vabds_f32 (float32_t a, float32_t b)
return result;
}
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vabs_s8 (int8x8_t a)
-{
- int8x8_t result;
- __asm__ ("abs %0.8b,%1.8b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
-vabs_s16 (int16x4_t a)
-{
- int16x4_t result;
- __asm__ ("abs %0.4h,%1.4h"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
-vabs_s32 (int32x2_t a)
-{
- int32x2_t result;
- __asm__ ("abs %0.2s,%1.2s"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vabsq_s8 (int8x16_t a)
-{
- int8x16_t result;
- __asm__ ("abs %0.16b,%1.16b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
-vabsq_s16 (int16x8_t a)
-{
- int16x8_t result;
- __asm__ ("abs %0.8h,%1.8h"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
-vabsq_s32 (int32x4_t a)
-{
- int32x4_t result;
- __asm__ ("abs %0.4s,%1.4s"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
-vabsq_s64 (int64x2_t a)
-{
- int64x2_t result;
- __asm__ ("abs %0.2d,%1.2d"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddlv_s8 (int8x8_t a)
{
@@ -17395,6 +17318,30 @@ vabs_f32 (float32x2_t __a)
return __builtin_aarch64_absv2sf (__a);
}
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vabs_f64 (float64x1_t __a)
+{
+ return __builtin_fabs (__a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vabs_s8 (int8x8_t __a)
+{
+ return __builtin_aarch64_absv8qi (__a);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vabs_s16 (int16x4_t __a)
+{
+ return __builtin_aarch64_absv4hi (__a);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vabs_s32 (int32x2_t __a)
+{
+ return __builtin_aarch64_absv2si (__a);
+}
+
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vabs_s64 (int64x1_t __a)
{
@@ -17413,6 +17360,30 @@ vabsq_f64 (float64x2_t __a)
return __builtin_aarch64_absv2df (__a);
}
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vabsq_s8 (int8x16_t __a)
+{
+ return __builtin_aarch64_absv16qi (__a);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vabsq_s16 (int16x8_t __a)
+{
+ return __builtin_aarch64_absv8hi (__a);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vabsq_s32 (int32x4_t __a)
+{
+ return __builtin_aarch64_absv4si (__a);
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vabsq_s64 (int64x2_t __a)
+{
+ return __builtin_aarch64_absv2di (__a);
+}
+
/* vadd */
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3f5a355..ae3b789 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2013-07-20 James Greenhalgh <james.greenhalgh@arm.com>
+
+ * gcc.target/aarch64/vabs_intrinsic_1.c: New file.
+
2013-07-20 Joern Rennecke <joern.rennecke@embecosm.com>
* gcc.dg/pr57154.c: Add dg-require-effective-target scheduling.
diff --git a/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c
new file mode 100644
index 0000000..b34738c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vabs_intrinsic_1.c
@@ -0,0 +1,101 @@
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define ETYPE(size) int##size##_t
+#define VTYPE(size, lanes) int##size##x##lanes##_t
+
+#define TEST_VABS(q, size, lanes) \
+static void \
+test_vabs##q##_##size (ETYPE (size) * res, \
+ const ETYPE (size) *in1) \
+{ \
+ VTYPE (size, lanes) a = vld1##q##_s##size (res); \
+ VTYPE (size, lanes) b = vld1##q##_s##size (in1); \
+ a = vabs##q##_s##size (b); \
+ vst1##q##_s##size (res, a); \
+}
+
+#define BUILD_VARS(width, n_lanes, n_half_lanes) \
+TEST_VABS (, width, n_half_lanes) \
+TEST_VABS (q, width, n_lanes) \
+
+BUILD_VARS (64, 2, 1)
+BUILD_VARS (32, 4, 2)
+BUILD_VARS (16, 8, 4)
+BUILD_VARS (8, 16, 8)
+
+#define POOL1 {-10}
+#define POOL2 {2, -10}
+#define POOL4 {0, -10, 2, -3}
+#define POOL8 {0, -10, 2, -3, 4, -50, 6, -70}
+#define POOL16 {0, -10, 2, -3, 4, -50, 6, -70, \
+ -5, 10, -2, 3, -4, 50, -6, 70}
+
+#define EXPECTED1 {10}
+#define EXPECTED2 {2, 10}
+#define EXPECTED4 {0, 10, 2, 3}
+#define EXPECTED8 {0, 10, 2, 3, 4, 50, 6, 70}
+#define EXPECTED16 {0, 10, 2, 3, 4, 50, 6, 70, \
+ 5, 10, 2, 3, 4, 50, 6, 70}
+
+#define BUILD_TEST(size, lanes_64, lanes_128) \
+static void \
+test_##size (void) \
+{ \
+ int i; \
+ ETYPE (size) pool1[lanes_64] = POOL##lanes_64; \
+ ETYPE (size) res1[lanes_64] = {0}; \
+ ETYPE (size) expected1[lanes_64] = EXPECTED##lanes_64; \
+ ETYPE (size) pool2[lanes_128] = POOL##lanes_128; \
+ ETYPE (size) res2[lanes_128] = {0}; \
+ ETYPE (size) expected2[lanes_128] = EXPECTED##lanes_128; \
+ \
+ /* Forcefully avoid optimization. */ \
+ asm volatile ("" : : : "memory"); \
+ test_vabs_##size (res1, pool1); \
+ for (i = 0; i < lanes_64; i++) \
+ if (res1[i] != expected1[i]) \
+ abort (); \
+ \
+ /* Forcefully avoid optimization. */ \
+ asm volatile ("" : : : "memory"); \
+ test_vabsq_##size (res2, pool2); \
+ for (i = 0; i < lanes_128; i++) \
+ if (res2[i] != expected2[i]) \
+ abort (); \
+}
+
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+BUILD_TEST (8 , 8, 16)
+
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
+BUILD_TEST (16, 4, 8)
+
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 1 } } */
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
+BUILD_TEST (32, 2, 4)
+
+/* { dg-final { scan-assembler-times "abs\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
+BUILD_TEST (64, 1, 2)
+
+#undef BUILD_TEST
+
+#define BUILD_TEST(size) test_##size ()
+
+int
+main (int argc, char **argv)
+{
+ BUILD_TEST (8);
+ BUILD_TEST (16);
+ BUILD_TEST (32);
+ BUILD_TEST (64);
+ return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */