diff options
author | Felix Yang <felix.yang@huawei.com> | 2014-12-07 15:01:23 +0000 |
---|---|---|
committer | Fei Yang <fyang@gcc.gnu.org> | 2014-12-07 15:01:23 +0000 |
commit | a5e69cad62b615cf6ec79fb9222b2dfcad0bbd31 (patch) | |
tree | 9e8418a1dc139b57bb2a19910f28b293251df645 | |
parent | 07bdf21b53445f88ec4c5a21fbb5f0e2b0b31e9f (diff) | |
download | gcc-a5e69cad62b615cf6ec79fb9222b2dfcad0bbd31.zip gcc-a5e69cad62b615cf6ec79fb9222b2dfcad0bbd31.tar.gz gcc-a5e69cad62b615cf6ec79fb9222b2dfcad0bbd31.tar.bz2 |
aarch64-simd.md (clrsb<mode>2, [...]): New patterns.
* config/aarch64/aarch64-simd.md (clrsb<mode>2, popcount<mode>2): New
patterns.
* config/aarch64/aarch64-simd-builtins.def (clrsb, popcount): New
builtins.
* config/aarch64/arm_neon.h (vcls_s8, vcls_s16, vcls_s32, vclsq_s8,
vclsq_s16, vclsq_s32, vcnt_p8, vcnt_s8, vcnt_u8, vcntq_p8, vcntq_s8,
vcntq_u8): Rewrite using builtin functions.
Co-Authored-By: Shanyao Chen <chenshanyao@huawei.com>
From-SVN: r218464
-rw-r--r-- | gcc/ChangeLog | 11 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 16 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 208 |
4 files changed, 105 insertions, 132 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e233e65..41e6f3e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2014-12-07 Felix Yang <felix.yang@huawei.com> + Shanyao Chen <chenshanyao@huawei.com> + + * config/aarch64/aarch64-simd.md (clrsb<mode>2, popcount<mode>2): New + patterns. + * config/aarch64/aarch64-simd-builtins.def (clrsb, popcount): New + builtins. + * config/aarch64/arm_neon.h (vcls_s8, vcls_s16, vcls_s32, vclsq_s8, + vclsq_s16, vclsq_s32, vcnt_p8, vcnt_s8, vcnt_u8, vcntq_p8, vcntq_s8, + vcntq_u8): Rewrite using builtin functions. + 2014-12-07 Jan Hubicka <hubicka@ucw.cz> * symtab.c (symtab_node::equal_address_to): New function. diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 503fa2c..953eb53 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -44,8 +44,10 @@ BUILTIN_VDQF (UNOP, sqrt, 2) BUILTIN_VD_BHSI (BINOP, addp, 0) VAR1 (UNOP, addp, 0, di) + BUILTIN_VDQ_BHSI (UNOP, clrsb, 2) BUILTIN_VDQ_BHSI (UNOP, clz, 2) BUILTIN_VS (UNOP, ctz, 2) + BUILTIN_VB (UNOP, popcount, 2) /* be_checked_get_lane does its own lane swapping, so not a lane index. */ BUILTIN_VALL (GETREG, be_checked_get_lane, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 4995e4d..d44d774 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1903,6 +1903,14 @@ DONE; }) +(define_insn "clrsb<mode>2" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] + "TARGET_SIMD" + "cls\\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "type" "neon_cls<q>")] +) + (define_insn "clz<mode>2" [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] @@ -1911,6 +1919,14 @@ [(set_attr "type" "neon_cls<q>")] ) +(define_insn "popcount<mode>2" + [(set (match_operand:VB 0 "register_operand" "=w") + (popcount:VB (match_operand:VB 1 "register_operand" "w")))] + "TARGET_SIMD" + "cnt\\t%0.<Vbtype>, %1.<Vbtype>" + [(set_attr "type" "neon_cnt<q>")] +) + ;; 'across lanes' max and min ops. ;; Template for outputting a scalar, so we can create __builtins which can be diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 8cff719..f3a8731 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -5317,138 +5317,6 @@ vaddlvq_u32 (uint32x4_t a) return result; } -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vcls_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("cls %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vcls_s16 (int16x4_t a) -{ - int16x4_t result; - __asm__ ("cls %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vcls_s32 (int32x2_t a) -{ - int32x2_t result; - __asm__ ("cls %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vclsq_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("cls %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vclsq_s16 (int16x8_t a) -{ - int16x8_t result; - __asm__ ("cls %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vclsq_s32 (int32x4_t a) -{ - int32x4_t result; - __asm__ ("cls %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vcnt_p8 (poly8x8_t a) -{ - poly8x8_t result; - __asm__ ("cnt %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vcnt_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("cnt %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vcnt_u8 (uint8x8_t a) -{ - uint8x8_t result; - __asm__ ("cnt %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vcntq_p8 (poly8x16_t a) -{ - poly8x16_t result; - __asm__ ("cnt %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vcntq_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("cnt %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vcntq_u8 (uint8x16_t a) -{ - uint8x16_t result; - __asm__ ("cnt %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - #define vcopyq_lane_f32(a, b, c, d) \ __extension__ \ ({ \ @@ -14082,6 +13950,44 @@ vcltzd_f64 (float64_t __a) return __a < 0.0 ? -1ll : 0ll; } +/* vcls. */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vcls_s8 (int8x8_t __a) +{ + return __builtin_aarch64_clrsbv8qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vcls_s16 (int16x4_t __a) +{ + return __builtin_aarch64_clrsbv4hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcls_s32 (int32x2_t __a) +{ + return __builtin_aarch64_clrsbv2si (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vclsq_s8 (int8x16_t __a) +{ + return __builtin_aarch64_clrsbv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vclsq_s16 (int16x8_t __a) +{ + return __builtin_aarch64_clrsbv8hi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vclsq_s32 (int32x4_t __a) +{ + return __builtin_aarch64_clrsbv4si (__a); +} + /* vclz. */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) @@ -14156,6 +14062,44 @@ vclzq_u32 (uint32x4_t __a) return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a); } +/* vcnt. */ + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vcnt_p8 (poly8x8_t __a) +{ + return (poly8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vcnt_s8 (int8x8_t __a) +{ + return __builtin_aarch64_popcountv8qi (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcnt_u8 (uint8x8_t __a) +{ + return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vcntq_p8 (poly8x16_t __a) +{ + return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vcntq_s8 (int8x16_t __a) +{ + return __builtin_aarch64_popcountv16qi (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcntq_u8 (uint8x16_t __a) +{ + return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a); +} + /* vcvt (double -> float). */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) |