diff options
author | Chris Sidebottom <chris.sidebottom@arm.com> | 2023-05-25 14:57:34 +0100 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-05-25 14:57:34 +0100 |
commit | f3dbc4112da318d1685a0833c7b3180589bbba2e (patch) | |
tree | 0bc8aa2b3107aba6f7a5fbaf53f6ecfc42d820cc /gcc | |
parent | f5298d9969b4fa34ff3aecd54b9630e22b2984a5 (diff) | |
download | gcc-f3dbc4112da318d1685a0833c7b3180589bbba2e.zip gcc-f3dbc4112da318d1685a0833c7b3180589bbba2e.tar.gz gcc-f3dbc4112da318d1685a0833c7b3180589bbba2e.tar.bz2 |
arm: Implement ACLE Data Intrinsics
This patch implements a number of scalar data processing intrinsics from ACLE
that were requested by some users. Some of these have fast single-instruction
sequences for Armv6 and later, but even for earlier versions they can still emit
an inline sequence or a call to libgcc (and ACLE recommends them being unconditionally
available).
Chris Sidebottom wrote most of the patch, I just cleaned it up, wired up some builtins
and adjusted the tests.
Bootstrapped and tested on arm-none-linux-gnueabihf.
Co-authored-by: Chris Sidebottom <chris.sidebottom@arm.com>
gcc/ChangeLog:
* config/arm/arm.md (rbitsi2): Rename to...
(arm_rbit): ... This.
(ctzsi2): Adjust for the above.
(arm_rev16si2): Convert to define_expand.
(arm_rev16si2_alt1): New pattern.
(arm_rev16si2_alt): Rename to...
(*arm_rev16si2_alt2): ... This.
* config/arm/arm_acle.h (__ror, __rorl, __rorll, __clz, __clzl, __clzll,
__cls, __clsl, __clsll, __revsh, __rev, __revl, __revll, __rev16,
__rev16l, __rev16ll, __rbit, __rbitl, __rbitll): Define intrinsics.
* config/arm/arm_acle_builtins.def (rbit, rev16si2): Define builtins.
gcc/testsuite/ChangeLog:
* gcc.target/arm/acle/data-intrinsics-armv6.c: New test.
* gcc.target/arm/acle/data-intrinsics-assembly.c: New test.
* gcc.target/arm/acle/data-intrinsics-rbit.c: New test.
* gcc.target/arm/acle/data-intrinsics.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/arm/arm.md | 20 | ||||
-rw-r--r-- | gcc/config/arm/arm_acle.h | 64 | ||||
-rw-r--r-- | gcc/config/arm/arm_acle_builtins.def | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/acle/data-intrinsics-armv6.c | 28 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/acle/data-intrinsics-assembly.c | 263 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/acle/data-intrinsics-rbit.c | 98 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/acle/data-intrinsics.c | 26 |
7 files changed, 498 insertions, 4 deletions
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 40c4d84..2c7249f 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -12180,7 +12180,7 @@ [(set_attr "predicable" "yes") (set_attr "type" "clz")]) -(define_insn "rbitsi2" +(define_insn "arm_rbit" [(set (match_operand:SI 0 "s_register_operand" "=r") (unspec:SI [(match_operand:SI 1 "s_register_operand" "r")] UNSPEC_RBIT))] "TARGET_32BIT && arm_arch_thumb2" @@ -12200,7 +12200,7 @@ "&& reload_completed" [(const_int 0)] " - emit_insn (gen_rbitsi2 (operands[0], operands[1])); + emit_insn (gen_arm_rbit (operands[0], operands[1])); emit_insn (gen_clzsi2 (operands[0], operands[0])); DONE; ") @@ -12564,7 +12564,7 @@ ;; operations within an IOR/AND RTX, therefore we have two patterns matching ;; each valid permutation. -(define_insn "arm_rev16si2" +(define_insn "arm_rev16si2_alt1" [(set (match_operand:SI 0 "register_operand" "=l,l,r") (ior:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "l,l,r") (const_int 8)) @@ -12581,7 +12581,7 @@ (set_attr "type" "rev")] ) -(define_insn "arm_rev16si2_alt" +(define_insn "*arm_rev16si2_alt2" [(set (match_operand:SI 0 "register_operand" "=l,l,r") (ior:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,l,r") (const_int 8)) @@ -12598,6 +12598,18 @@ (set_attr "type" "rev")] ) +(define_expand "arm_rev16si2" + [(set (match_operand:SI 0 "s_register_operand") + (bswap:SI (match_operand:SI 1 "s_register_operand")))] + "arm_arch6" + { + rtx left = gen_int_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00), SImode); + rtx right = gen_int_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff), SImode); + emit_insn (gen_arm_rev16si2_alt1 (operands[0], operands[1], right, left)); + DONE; + } +) + (define_expand "bswaphi2" [(set (match_operand:HI 0 "s_register_operand") (bswap:HI (match_operand:HI 1 "s_register_operand")))] diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h index 4a5a6a8..0ebd250 100644 --- a/gcc/config/arm/arm_acle.h +++ b/gcc/config/arm/arm_acle.h @@ -28,10 +28,74 @@ #define _GCC_ARM_ACLE_H #include <stdint.h> + #ifdef __cplusplus extern "C" { #endif +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) \ +__extension__ extern __inline TYPE \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +NAME (TYPE __value, uint32_t __rotate) \ +{ \ + int __size = (int) sizeof (TYPE) * __CHAR_BIT__; \ + __rotate = __rotate % __size; \ + return __value >> __rotate | __value << ((__size - __rotate) % __size); \ +} + +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t) +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long) +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t) + +#undef _GCC_ARM_ACLE_ROR_FN + +#define _GCC_ARM_ACLE_DATA_FN(NAME, ITYPE, RTYPE) \ +__extension__ extern __inline RTYPE \ +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ +__##NAME (ITYPE __value) + +#define _GCC_ARM_ACLE_DATA_ALIAS(NAME, BUILTIN, ITYPE, RTYPE) \ +_GCC_ARM_ACLE_DATA_FN(NAME, ITYPE, RTYPE) \ +{ \ + return __builtin_##BUILTIN (__value); \ +} + +_GCC_ARM_ACLE_DATA_ALIAS (clz, clz, uint32_t, unsigned int) +_GCC_ARM_ACLE_DATA_ALIAS (clzl, clzl, unsigned long, unsigned int) +_GCC_ARM_ACLE_DATA_ALIAS (clzll, clzll, uint64_t, unsigned int) +_GCC_ARM_ACLE_DATA_ALIAS (cls, clrsb, uint32_t, unsigned int) +_GCC_ARM_ACLE_DATA_ALIAS (clsl, clrsbl, unsigned long, unsigned int) +_GCC_ARM_ACLE_DATA_ALIAS (clsll, clrsbll, uint64_t, unsigned int) +_GCC_ARM_ACLE_DATA_ALIAS (revsh, bswap16, int16_t, int16_t) +_GCC_ARM_ACLE_DATA_ALIAS (rev, bswap32, uint32_t, uint32_t) +_GCC_ARM_ACLE_DATA_ALIAS (revl, bswap32, unsigned long, unsigned long) +_GCC_ARM_ACLE_DATA_ALIAS (revll, bswap64, uint64_t, uint64_t) +#if __ARM_ARCH >= 6 +_GCC_ARM_ACLE_DATA_ALIAS (rev16, arm_rev16si2, uint32_t, uint32_t) +_GCC_ARM_ACLE_DATA_ALIAS (rev16l, arm_rev16si2, unsigned long, unsigned long) +#else +_GCC_ARM_ACLE_DATA_FN(rev16, uint32_t, uint32_t) { + return ((__value & 0xff00ff) << 8 | (__value & 0xff00ff00) >> 8); +} +_GCC_ARM_ACLE_DATA_FN(rev16l, unsigned long, unsigned long) { + return ((__value & 0xff00ff) << 8 | (__value & 0xff00ff00) >> 8); +} +#endif +_GCC_ARM_ACLE_DATA_FN(rev16ll, uint64_t, uint64_t) { + return __rev16l(__value) | (uint64_t)__rev16l(__value >> 32) << 32; +} + +#if __ARM_ARCH_6T2__ || __ARM_ARCH >= 7 +_GCC_ARM_ACLE_DATA_ALIAS (rbit, arm_rbit, uint32_t, uint32_t) +_GCC_ARM_ACLE_DATA_ALIAS (rbitl, arm_rbit, unsigned long, unsigned int) +_GCC_ARM_ACLE_DATA_FN(rbitll, uint64_t, uint64_t) { + return ((uint64_t)__rbit(__value) << 32) | __rbit(__value >> 32); +} +#endif + +#undef _GCC_ARM_ACLE_DATA_ALIAS +#undef _GCC_ARM_ACLE_DATA_FN + #if (!__thumb__ || __thumb2__) && __ARM_ARCH >= 4 __extension__ static __inline void __attribute__ ((__always_inline__)) __arm_cdp (const unsigned int __coproc, const unsigned int __opc1, diff --git a/gcc/config/arm/arm_acle_builtins.def b/gcc/config/arm/arm_acle_builtins.def index 48332eb..ff1afff 100644 --- a/gcc/config/arm/arm_acle_builtins.def +++ b/gcc/config/arm/arm_acle_builtins.def @@ -117,3 +117,6 @@ VAR1 (BINOP, smuadx, si) VAR1 (SAT_BINOP_UNSIGNED_IMM, ssat16, si) VAR1 (SAT_BINOP_UNSIGNED_IMM, usat16, si) + +VAR1 (BSWAP, rbit, si) +VAR1 (BSWAP, rev16si2, si) diff --git a/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-armv6.c b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-armv6.c new file mode 100644 index 0000000..aafdff3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-armv6.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_arch_v6_ok } */ +/* { dg-add-options arm_arch_v6t2 } */ + +#include "arm_acle.h" + +volatile uint32_t clz_in = 0x1234; +volatile uint32_t rev_in = 0x12345678; +volatile uint64_t rev64_in = 0x1234567890abcdef; + +int +main (int argc, char **argv) +{ + if (__clz(clz_in) != 19) { __builtin_abort(); } + if (__clzl(clz_in) != 19) { __builtin_abort(); } + if (__clzll(clz_in) != 51) { __builtin_abort(); } + if (__cls(clz_in) != 18) { __builtin_abort(); } + if (__clsl(clz_in) != 18) { __builtin_abort(); } + if (__clsll(clz_in) != 50) { __builtin_abort(); } + if (__rev(rev_in) != 0x78563412) { __builtin_abort(); } + if (__revl(rev_in) != 0x78563412) { __builtin_abort(); } + if (__revll(rev64_in) != 0xefcdab9078563412) { __builtin_abort(); } + if (__rev16(rev_in) != 0x34127856) { __builtin_abort(); } + if (__rev16l(rev_in) != 0x34127856) { __builtin_abort(); } + if (__rev16ll(rev64_in) != 0x34127856ab90efcd) { __builtin_abort(); } + if (__revsh(clz_in) != 0x3412) { __builtin_abort(); } + return 0; +} diff --git a/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-assembly.c b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-assembly.c new file mode 100644 index 0000000..3e06687 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-assembly.c @@ -0,0 +1,263 @@ +/* Test the ACLE data intrinsics get expanded to the correct instructions on a specific architecture */ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_arch_v6_ok } */ +/* { dg-additional-options "--save-temps -O1" } */ +/* { dg-add-options arm_arch_v6 } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include "arm_acle.h" + +uint32_t *g32; +unsigned long *gul; +uint64_t *g64; +unsigned int *gui; +int16_t *g16; + +/* +** test_clz: +** clz r0, r0 +** bx lr +*/ + +unsigned int test_clz (uint32_t a) +{ + return __clz (a); +} + +/* +** test_clzl: +** clz r0, r0 +** bx lr +*/ + +unsigned int test_clzl (unsigned long a) +{ + return __clzl (a); +} + +/* +** test_cls: +** eor (r[0-9]+), r0, r0, asr #31 +** clz (r[0-9]+), \1 +** sub r0, \2, #1 +** bx lr +*/ + +unsigned int test_cls (uint32_t a) +{ + return __cls(a); +} + +/* +** test_clsl: +** eor (r[0-9]+), r0, r0, asr #31 +** clz (r[0-9]+), \1 +** sub r0, \2, #1 +** bx lr +*/ + +unsigned int test_clsl (unsigned long a) +{ + return __clsl (a); +} + +/* +** test_rev: +** rev r0, r0 +** bx lr +*/ + +uint32_t test_rev (uint32_t a) +{ + return __rev (a); +} + +/* +** test_revl: +** rev r0, r0 +** bx lr +*/ + +unsigned long test_revl (unsigned long a) +{ + return __revl (a); +} + +/* +** test_revll: +** mov (r[0-9]+), r0 +** rev r0, r1 +** rev r1, \1 +** bx lr +*/ + +uint64_t test_revll (uint64_t a) +{ + return __revll (a); +} + +/* +** test_ror: +** and (r[0-9]+), r1, #31 +** ror r0, r0, \1 +** bx lr +*/ + +uint32_t test_ror (uint32_t a, uint32_t r) +{ + return __ror (a, r); +} + +/* +** test_rorl: +** and (r[0-9]+), r1, #31 +** ror r0, r0, \1 +** bx lr +*/ + +unsigned long test_rorl (unsigned long a, uint32_t r) +{ + return __rorl (a, r); +} + +/* +** test_revsh: +** revsh r0, r0 +** bx lr +*/ + +int16_t test_revsh (int16_t a) +{ + return __revsh (a); +} + +/* +** test_clz_mem: +** ... +** clz r[0-9]+, r[0-9]+ +** ... +** bx lr +*/ + +void test_clz_mem (uint32_t *a) +{ + *gui = __clz (*a); +} + +/* +** test_clzl_mem: +** ... +** clz r[0-9]+, r[0-9]+ +** ... +** bx lr +*/ + +void test_clzl_mem (unsigned long *a) +{ + *gui = __clzl (*a); +} + +/* +** test_cls_mem: +** ... +** clz r[0-9]+, r[0-9]+ +** ... +** bx lr +*/ + +void test_cls_mem (uint32_t *a) +{ + *gui = __cls (*a); +} + +/* +** test_clsl_mem: +** ... +** clz r[0-9]+, r[0-9]+ +** ... +** bx lr +*/ + +void test_clsl_mem (unsigned long *a) +{ + *gui = __clsl (*a); +} + +/* +** test_rev_mem: +** ... +** rev r[0-9]+, r[0-9]+ +** ... +** bx lr +*/ + +void test_rev_mem (uint32_t *a) +{ + *g32 = __rev (*a); +} + +/* +** test_revl_mem: +** ... +** rev r[0-9]+, r[0-9]+ +** ... +** bx lr +*/ + +void test_revl_mem (unsigned long *a) +{ + *gul = __revl (*a); +} + +/* +** test_revll_mem: +** ... +** rev r[0-9]+, r[0-9]+ +** ... +** bx lr +*/ + +void test_revll_mem (uint64_t *a) +{ + *g64 = __revll (*a); +} + +/* +** test_ror_mem: +** ... +** ror r[0-9]+, r[0-9]+, r[0-9]+ +** ... +** bx lr +*/ + +void test_ror_mem (uint32_t *a, uint32_t *r) +{ + *g32 = __ror (*a, *r); +} + +/* +** test_rorl_mem: +** ... +** ror r[0-9]+, r[0-9]+, r[0-9]+ +** ... +** bx lr +*/ + +void test_rorl_mem (unsigned long *a, uint32_t *r) +{ + *gul = __rorl (*a, *r); +} + +/* +** test_revsh_mem: +** ... +** rev16 r[0-9]+, r[0-9]+ +** ... +** bx lr +*/ + +void test_revsh_mem (int16_t *a) +{ + *g16 = __revsh (*a); +} + diff --git a/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-rbit.c b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-rbit.c new file mode 100644 index 0000000..d1fe274 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-rbit.c @@ -0,0 +1,98 @@ +/* Test the ACLE data intrinsics existence for specific instruction. */ +/* { dg-do run } */ +/* { dg-require-effective-target arm_arch_v6t2_ok } */ +/* { dg-additional-options "--save-temps -O1" } */ +/* { dg-add-options arm_arch_v6t2 } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include "arm_acle.h" + +extern void abort (void); + +uint32_t *g32; +unsigned long *gul; +uint64_t *g64; + +/* +** test_rbit: +** rbit r0, r0 +** bx lr +*/ + +uint32_t test_rbit (uint32_t a) +{ + return __rbit (a); +} + +/* +** test_rbitl: +** rbit r0, r0 +** bx lr +*/ + +unsigned long test_rbitl (unsigned long a) +{ + return __rbitl (a); +} + +/* +** test_rbitll: +** mov (r[0-9]+), r0 +** rbit r0, r1 +** rbit r1, \1 +** bx lr +*/ + +uint64_t test_rbitll (uint64_t a) +{ + return __rbitll (a); +} + +/* +** test_rbit_mem: +** ... +** rbit r[0-9]+, r[0-9]+ +** ... +** bx lr +*/ + +void test_rbit_mem (uint32_t *a) +{ + *g32 = __rbit (*a); +} + +/* +** test_rbitl_mem: +** ... +** rbit r[0-9]+, r[0-9]+ +** ... +** bx lr +*/ + +void test_rbitl_mem (unsigned long *a) +{ + *gul = __rbitl (*a); +} + +/* +** test_rbitll_mem: +** ... +** rbit r[0-9]+, r[0-9]+ +** ... +** bx lr +*/ + +void test_rbitll_mem (uint64_t *a) +{ + *g64 = __rbitll (*a); +} + +int +main (int argc, char **argv) +{ + if (__rbit(0x12345678) != 0x1e6a2c48) { abort(); } + if (__rbitl(0x12345678) != 0x1e6a2c48) { abort(); } + if (__rbitll(0x1234567890abcdef) != 0xf7b3d5091e6a2c48) { abort(); } + return 0; +} + diff --git a/gcc/testsuite/gcc.target/arm/acle/data-intrinsics.c b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics.c new file mode 100644 index 0000000..5c05b34 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include "arm_acle.h" + +volatile uint32_t clz_in = 0x1234; +volatile uint32_t rev_in = 0x12345678; +volatile uint64_t rev64_in = 0x1234567890abcdef; + +int +main (int argc, char **argv) +{ + if (__clz(clz_in) != 19) { __builtin_abort(); } + if (__clzl(clz_in) != 19) { __builtin_abort(); } + if (__clzll(clz_in) != 51) { __builtin_abort(); } + if (__cls(clz_in) != 18) { __builtin_abort(); } + if (__clsl(clz_in) != 18) { __builtin_abort(); } + if (__clsll(clz_in) != 50) { __builtin_abort(); } + if (__rev(rev_in) != 0x78563412) { __builtin_abort(); } + if (__revl(rev_in) != 0x78563412) { __builtin_abort(); } + if (__revll(rev64_in) != 0xefcdab9078563412) { __builtin_abort(); } + if (__rev16(rev_in) != 0x34127856) { __builtin_abort(); } + if (__rev16l(rev_in) != 0x34127856) { __builtin_abort(); } + if (__rev16ll(rev64_in) != 0x34127856ab90efcd) { __builtin_abort(); } + if (__revsh(clz_in) != 0x3412) { __builtin_abort(); } + return 0; +} |