diff options
author | Alan Lawrence <alan.lawrence@arm.com> | 2014-09-05 09:58:21 +0000 |
---|---|---|
committer | Alan Lawrence <alalaw01@gcc.gnu.org> | 2014-09-05 09:58:21 +0000 |
commit | cf465d71a2938e7ee6aa705ed539060a4cb077bf (patch) | |
tree | 6cd5b612c461e56eb8d93e47add14cf8c94ee5b3 /gcc | |
parent | 08c13199cf5568393fd46481d99cf1729480548a (diff) | |
download | gcc-cf465d71a2938e7ee6aa705ed539060a4cb077bf.zip gcc-cf465d71a2938e7ee6aa705ed539060a4cb077bf.tar.gz gcc-cf465d71a2938e7ee6aa705ed539060a4cb077bf.tar.bz2 |
[PATCH AArch64] Add a builtin for rbit(q?)_p8; add intrinsics and tests.
gcc/:
* config/aarch64/aarch64-simd.md (aarch64_rbit<mode>): New pattern.
* config/aarch64/aarch64-simd-builtins.def (rbit): New builtin.
* config/aarch64/arm_neon.h (vrbit_s8, vrbit_u8, vrbitq_s8, vrbitq_u8):
Replace temporary asm with call to builtin.
(vrbit_p8, vrbitq_p8): New functions.
gcc/testsuite/:
* gcc.target/aarch64/simd/vrbit_1.c: New test.
From-SVN: r214943
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 9 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 82 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c | 56 |
6 files changed, 117 insertions, 44 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9c329a9..9f13d1e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2014-09-05 Alan Lawrence <alan.lawrence@arm.com> + + * config/aarch64/aarch64-simd.md (aarch64_rbit<mode>): New pattern. + * config/aarch64/aarch64-simd-builtins.def (rbit): New builtin. + * config/aarch64/arm_neon.h (vrbit_s8, vrbit_u8, vrbitq_s8, vrbitq_u8): + Replace temporary asm with call to builtin. + (vrbit_p8, vrbitq_p8): New functions. + 2014-09-05 Richard Biener <rguenther@suse.de> * cfgloop.c (mark_loop_for_removal): New function. diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 4f3bd12..aa19130 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -344,6 +344,8 @@ VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di) + BUILTIN_VB (UNOP, rbit, 0) + /* Implemented by aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */ BUILTIN_VALL (BINOP, zip1, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c489636..8ffc75e 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -294,6 +294,15 @@ [(set_attr "type" "neon_rev<q>")] ) +(define_insn "aarch64_rbit<mode>" + [(set (match_operand:VB 0 "register_operand" "=w") + (unspec:VB [(match_operand:VB 1 "register_operand" "w")] + UNSPEC_RBIT))] + "TARGET_SIMD" + "rbit\\t%0.<Vbtype>, %1.<Vbtype>" + [(set_attr "type" "neon_rbit")] +) + (define_insn "*aarch64_mul3_elt<mode>" [(set (match_operand:VMUL 0 "register_operand" "=w") (mult:VMUL diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 0a86172..86926b9 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -10407,50 +10407,6 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) result; \ }) -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vrbit_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("rbit %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vrbit_u8 (uint8x8_t a) -{ - uint8x8_t result; - __asm__ ("rbit %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vrbitq_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("rbit %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vrbitq_u8 (uint8x16_t a) -{ - uint8x16_t result; - __asm__ ("rbit %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vrecpe_u32 (uint32x2_t a) { @@ -20781,6 +20737,44 @@ vqsubd_u64 (uint64_t __a, uint64_t __b) return __builtin_aarch64_uqsubdi_uuu (__a, __b); } +/* vrbit */ + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrbit_p8 (poly8x8_t __a) +{ + return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrbit_s8 (int8x8_t __a) +{ + return __builtin_aarch64_rbitv8qi (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrbit_u8 (uint8x8_t __a) +{ + return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrbitq_p8 (poly8x16_t __a) +{ + return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrbitq_s8 (int8x16_t __a) +{ + return __builtin_aarch64_rbitv16qi (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrbitq_u8 (uint8x16_t __a) +{ + return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a); +} + /* vrecpe */ __extension__ static __inline float32_t __attribute__ ((__always_inline__)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index af842fd..9ec19fa 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2014-09-05 Alan Lawrence <alan.lawrence@arm.com> + + * gcc.target/aarch64/simd/vrbit_1.c: New test. + 2014-09-05 Richard Biener <rguenther@suse.de> PR middle-end/63148 diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c new file mode 100644 index 0000000..77d13d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c @@ -0,0 +1,56 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ + +#include <arm_neon.h> + +extern void abort (void); + +uint64_t in1 = 0x0123456789abcdefULL; +uint64_t expected1 = 0x80c4a2e691d5b3f7ULL; + +#define TEST8(BASETYPE, SUFFIX) \ +void test8_##SUFFIX () \ +{ \ + BASETYPE##8x8_t out = vrbit_##SUFFIX (vcreate_##SUFFIX (in1)); \ + uint64_t res = vget_lane_u64 (vreinterpret_u64_##SUFFIX (out), 0); \ + if (res != expected1) abort (); \ +} + +uint64_t in2 = 0xdeadbeefcafebabeULL; +uint64_t expected2 = 0x7bb57df7537f5d7dULL; + +#define TEST16(BASETYPE, SUFFIX) \ +void test16_##SUFFIX () \ +{ \ + BASETYPE##8x16_t in = vcombine_##SUFFIX (vcreate_##SUFFIX (in1), \ + vcreate_##SUFFIX (in2)); \ + uint64x2_t res = vreinterpretq_u64_##SUFFIX (vrbitq_##SUFFIX (in)); \ + uint64_t res1 = vgetq_lane_u64 (res, 0); \ + uint64_t res2 = vgetq_lane_u64 (res, 1); \ + if (res1 != expected1 || res2 != expected2) abort (); \ +} + +TEST8 (poly, p8); +TEST8 (int, s8); +TEST8 (uint, u8); + +TEST16 (poly, p8); +TEST16 (int, s8); +TEST16 (uint, u8); + +int +main (int argc, char **argv) +{ + test8_p8 (); + test8_s8 (); + test8_u8 (); + test16_p8 (); + test16_s8 (); + test16_u8 (); + return 0; +} + +/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\]" 3 } } */ +/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\]" 3 } } */ + +/* { dg-final { cleanup-saved-temps } } */ |