aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorChris Sidebottom <chris.sidebottom@arm.com>2023-05-25 14:57:34 +0100
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2023-05-25 14:57:34 +0100
commitf3dbc4112da318d1685a0833c7b3180589bbba2e (patch)
tree0bc8aa2b3107aba6f7a5fbaf53f6ecfc42d820cc /gcc
parentf5298d9969b4fa34ff3aecd54b9630e22b2984a5 (diff)
downloadgcc-f3dbc4112da318d1685a0833c7b3180589bbba2e.zip
gcc-f3dbc4112da318d1685a0833c7b3180589bbba2e.tar.gz
gcc-f3dbc4112da318d1685a0833c7b3180589bbba2e.tar.bz2
arm: Implement ACLE Data Intrinsics
This patch implements a number of scalar data processing intrinsics from ACLE that were requested by some users. Some of these have fast single-instruction sequences for Armv6 and later, but even for earlier versions they can still emit an inline sequence or a call to libgcc (and ACLE recommends them being unconditionally available). Chris Sidebottom wrote most of the patch, I just cleaned it up, wired up some builtins and adjusted the tests. Bootstrapped and tested on arm-none-linux-gnueabihf. Co-authored-by: Chris Sidebottom <chris.sidebottom@arm.com> gcc/ChangeLog: * config/arm/arm.md (rbitsi2): Rename to... (arm_rbit): ... This. (ctzsi2): Adjust for the above. (arm_rev16si2): Convert to define_expand. (arm_rev16si2_alt1): New pattern. (arm_rev16si2_alt): Rename to... (*arm_rev16si2_alt2): ... This. * config/arm/arm_acle.h (__ror, __rorl, __rorll, __clz, __clzl, __clzll, __cls, __clsl, __clsll, __revsh, __rev, __revl, __revll, __rev16, __rev16l, __rev16ll, __rbit, __rbitl, __rbitll): Define intrinsics. * config/arm/arm_acle_builtins.def (rbit, rev16si2): Define builtins. gcc/testsuite/ChangeLog: * gcc.target/arm/acle/data-intrinsics-armv6.c: New test. * gcc.target/arm/acle/data-intrinsics-assembly.c: New test. * gcc.target/arm/acle/data-intrinsics-rbit.c: New test. * gcc.target/arm/acle/data-intrinsics.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/arm/arm.md20
-rw-r--r--gcc/config/arm/arm_acle.h64
-rw-r--r--gcc/config/arm/arm_acle_builtins.def3
-rw-r--r--gcc/testsuite/gcc.target/arm/acle/data-intrinsics-armv6.c28
-rw-r--r--gcc/testsuite/gcc.target/arm/acle/data-intrinsics-assembly.c263
-rw-r--r--gcc/testsuite/gcc.target/arm/acle/data-intrinsics-rbit.c98
-rw-r--r--gcc/testsuite/gcc.target/arm/acle/data-intrinsics.c26
7 files changed, 498 insertions, 4 deletions
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 40c4d84..2c7249f 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -12180,7 +12180,7 @@
[(set_attr "predicable" "yes")
(set_attr "type" "clz")])
-(define_insn "rbitsi2"
+(define_insn "arm_rbit"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(unspec:SI [(match_operand:SI 1 "s_register_operand" "r")] UNSPEC_RBIT))]
"TARGET_32BIT && arm_arch_thumb2"
@@ -12200,7 +12200,7 @@
"&& reload_completed"
[(const_int 0)]
"
- emit_insn (gen_rbitsi2 (operands[0], operands[1]));
+ emit_insn (gen_arm_rbit (operands[0], operands[1]));
emit_insn (gen_clzsi2 (operands[0], operands[0]));
DONE;
")
@@ -12564,7 +12564,7 @@
;; operations within an IOR/AND RTX, therefore we have two patterns matching
;; each valid permutation.
-(define_insn "arm_rev16si2"
+(define_insn "arm_rev16si2_alt1"
[(set (match_operand:SI 0 "register_operand" "=l,l,r")
(ior:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "l,l,r")
(const_int 8))
@@ -12581,7 +12581,7 @@
(set_attr "type" "rev")]
)
-(define_insn "arm_rev16si2_alt"
+(define_insn "*arm_rev16si2_alt2"
[(set (match_operand:SI 0 "register_operand" "=l,l,r")
(ior:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,l,r")
(const_int 8))
@@ -12598,6 +12598,18 @@
(set_attr "type" "rev")]
)
+(define_expand "arm_rev16si2"
+ [(set (match_operand:SI 0 "s_register_operand")
+ (bswap:SI (match_operand:SI 1 "s_register_operand")))]
+ "arm_arch6"
+ {
+ rtx left = gen_int_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00), SImode);
+ rtx right = gen_int_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff), SImode);
+ emit_insn (gen_arm_rev16si2_alt1 (operands[0], operands[1], right, left));
+ DONE;
+ }
+)
+
(define_expand "bswaphi2"
[(set (match_operand:HI 0 "s_register_operand")
(bswap:HI (match_operand:HI 1 "s_register_operand")))]
diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h
index 4a5a6a8..0ebd250 100644
--- a/gcc/config/arm/arm_acle.h
+++ b/gcc/config/arm/arm_acle.h
@@ -28,10 +28,74 @@
#define _GCC_ARM_ACLE_H
#include <stdint.h>
+
#ifdef __cplusplus
extern "C" {
#endif
+#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) \
+__extension__ extern __inline TYPE \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+NAME (TYPE __value, uint32_t __rotate) \
+{ \
+ int __size = (int) sizeof (TYPE) * __CHAR_BIT__; \
+ __rotate = __rotate % __size; \
+ return __value >> __rotate | __value << ((__size - __rotate) % __size); \
+}
+
+_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t)
+_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long)
+_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t)
+
+#undef _GCC_ARM_ACLE_ROR_FN
+
+#define _GCC_ARM_ACLE_DATA_FN(NAME, ITYPE, RTYPE) \
+__extension__ extern __inline RTYPE \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+__##NAME (ITYPE __value)
+
+#define _GCC_ARM_ACLE_DATA_ALIAS(NAME, BUILTIN, ITYPE, RTYPE) \
+_GCC_ARM_ACLE_DATA_FN(NAME, ITYPE, RTYPE) \
+{ \
+ return __builtin_##BUILTIN (__value); \
+}
+
+_GCC_ARM_ACLE_DATA_ALIAS (clz, clz, uint32_t, unsigned int)
+_GCC_ARM_ACLE_DATA_ALIAS (clzl, clzl, unsigned long, unsigned int)
+_GCC_ARM_ACLE_DATA_ALIAS (clzll, clzll, uint64_t, unsigned int)
+_GCC_ARM_ACLE_DATA_ALIAS (cls, clrsb, uint32_t, unsigned int)
+_GCC_ARM_ACLE_DATA_ALIAS (clsl, clrsbl, unsigned long, unsigned int)
+_GCC_ARM_ACLE_DATA_ALIAS (clsll, clrsbll, uint64_t, unsigned int)
+_GCC_ARM_ACLE_DATA_ALIAS (revsh, bswap16, int16_t, int16_t)
+_GCC_ARM_ACLE_DATA_ALIAS (rev, bswap32, uint32_t, uint32_t)
+_GCC_ARM_ACLE_DATA_ALIAS (revl, bswap32, unsigned long, unsigned long)
+_GCC_ARM_ACLE_DATA_ALIAS (revll, bswap64, uint64_t, uint64_t)
+#if __ARM_ARCH >= 6
+_GCC_ARM_ACLE_DATA_ALIAS (rev16, arm_rev16si2, uint32_t, uint32_t)
+_GCC_ARM_ACLE_DATA_ALIAS (rev16l, arm_rev16si2, unsigned long, unsigned long)
+#else
+_GCC_ARM_ACLE_DATA_FN(rev16, uint32_t, uint32_t) {
+ return ((__value & 0xff00ff) << 8 | (__value & 0xff00ff00) >> 8);
+}
+_GCC_ARM_ACLE_DATA_FN(rev16l, unsigned long, unsigned long) {
+ return ((__value & 0xff00ff) << 8 | (__value & 0xff00ff00) >> 8);
+}
+#endif
+_GCC_ARM_ACLE_DATA_FN(rev16ll, uint64_t, uint64_t) {
+ return __rev16l(__value) | (uint64_t)__rev16l(__value >> 32) << 32;
+}
+
+#if __ARM_ARCH_6T2__ || __ARM_ARCH >= 7
+_GCC_ARM_ACLE_DATA_ALIAS (rbit, arm_rbit, uint32_t, uint32_t)
+_GCC_ARM_ACLE_DATA_ALIAS (rbitl, arm_rbit, unsigned long, unsigned int)
+_GCC_ARM_ACLE_DATA_FN(rbitll, uint64_t, uint64_t) {
+ return ((uint64_t)__rbit(__value) << 32) | __rbit(__value >> 32);
+}
+#endif
+
+#undef _GCC_ARM_ACLE_DATA_ALIAS
+#undef _GCC_ARM_ACLE_DATA_FN
+
#if (!__thumb__ || __thumb2__) && __ARM_ARCH >= 4
__extension__ static __inline void __attribute__ ((__always_inline__))
__arm_cdp (const unsigned int __coproc, const unsigned int __opc1,
diff --git a/gcc/config/arm/arm_acle_builtins.def b/gcc/config/arm/arm_acle_builtins.def
index 48332eb..ff1afff 100644
--- a/gcc/config/arm/arm_acle_builtins.def
+++ b/gcc/config/arm/arm_acle_builtins.def
@@ -117,3 +117,6 @@ VAR1 (BINOP, smuadx, si)
VAR1 (SAT_BINOP_UNSIGNED_IMM, ssat16, si)
VAR1 (SAT_BINOP_UNSIGNED_IMM, usat16, si)
+
+VAR1 (BSWAP, rbit, si)
+VAR1 (BSWAP, rev16si2, si)
diff --git a/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-armv6.c b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-armv6.c
new file mode 100644
index 0000000..aafdff3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-armv6.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_arch_v6_ok } */
+/* { dg-add-options arm_arch_v6t2 } */
+
+#include "arm_acle.h"
+
+volatile uint32_t clz_in = 0x1234;
+volatile uint32_t rev_in = 0x12345678;
+volatile uint64_t rev64_in = 0x1234567890abcdef;
+
+int
+main (int argc, char **argv)
+{
+ if (__clz(clz_in) != 19) { __builtin_abort(); }
+ if (__clzl(clz_in) != 19) { __builtin_abort(); }
+ if (__clzll(clz_in) != 51) { __builtin_abort(); }
+ if (__cls(clz_in) != 18) { __builtin_abort(); }
+ if (__clsl(clz_in) != 18) { __builtin_abort(); }
+ if (__clsll(clz_in) != 50) { __builtin_abort(); }
+ if (__rev(rev_in) != 0x78563412) { __builtin_abort(); }
+ if (__revl(rev_in) != 0x78563412) { __builtin_abort(); }
+ if (__revll(rev64_in) != 0xefcdab9078563412) { __builtin_abort(); }
+ if (__rev16(rev_in) != 0x34127856) { __builtin_abort(); }
+ if (__rev16l(rev_in) != 0x34127856) { __builtin_abort(); }
+ if (__rev16ll(rev64_in) != 0x34127856ab90efcd) { __builtin_abort(); }
+ if (__revsh(clz_in) != 0x3412) { __builtin_abort(); }
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-assembly.c b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-assembly.c
new file mode 100644
index 0000000..3e06687
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-assembly.c
@@ -0,0 +1,263 @@
+/* Test the ACLE data intrinsics get expanded to the correct instructions on a specific architecture */
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_arch_v6_ok } */
+/* { dg-additional-options "--save-temps -O1" } */
+/* { dg-add-options arm_arch_v6 } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include "arm_acle.h"
+
+uint32_t *g32;
+unsigned long *gul;
+uint64_t *g64;
+unsigned int *gui;
+int16_t *g16;
+
+/*
+** test_clz:
+** clz r0, r0
+** bx lr
+*/
+
+unsigned int test_clz (uint32_t a)
+{
+ return __clz (a);
+}
+
+/*
+** test_clzl:
+** clz r0, r0
+** bx lr
+*/
+
+unsigned int test_clzl (unsigned long a)
+{
+ return __clzl (a);
+}
+
+/*
+** test_cls:
+** eor (r[0-9]+), r0, r0, asr #31
+** clz (r[0-9]+), \1
+** sub r0, \2, #1
+** bx lr
+*/
+
+unsigned int test_cls (uint32_t a)
+{
+ return __cls(a);
+}
+
+/*
+** test_clsl:
+** eor (r[0-9]+), r0, r0, asr #31
+** clz (r[0-9]+), \1
+** sub r0, \2, #1
+** bx lr
+*/
+
+unsigned int test_clsl (unsigned long a)
+{
+ return __clsl (a);
+}
+
+/*
+** test_rev:
+** rev r0, r0
+** bx lr
+*/
+
+uint32_t test_rev (uint32_t a)
+{
+ return __rev (a);
+}
+
+/*
+** test_revl:
+** rev r0, r0
+** bx lr
+*/
+
+unsigned long test_revl (unsigned long a)
+{
+ return __revl (a);
+}
+
+/*
+** test_revll:
+** mov (r[0-9]+), r0
+** rev r0, r1
+** rev r1, \1
+** bx lr
+*/
+
+uint64_t test_revll (uint64_t a)
+{
+ return __revll (a);
+}
+
+/*
+** test_ror:
+** and (r[0-9]+), r1, #31
+** ror r0, r0, \1
+** bx lr
+*/
+
+uint32_t test_ror (uint32_t a, uint32_t r)
+{
+ return __ror (a, r);
+}
+
+/*
+** test_rorl:
+** and (r[0-9]+), r1, #31
+** ror r0, r0, \1
+** bx lr
+*/
+
+unsigned long test_rorl (unsigned long a, uint32_t r)
+{
+ return __rorl (a, r);
+}
+
+/*
+** test_revsh:
+** revsh r0, r0
+** bx lr
+*/
+
+int16_t test_revsh (int16_t a)
+{
+ return __revsh (a);
+}
+
+/*
+** test_clz_mem:
+** ...
+** clz r[0-9]+, r[0-9]+
+** ...
+** bx lr
+*/
+
+void test_clz_mem (uint32_t *a)
+{
+ *gui = __clz (*a);
+}
+
+/*
+** test_clzl_mem:
+** ...
+** clz r[0-9]+, r[0-9]+
+** ...
+** bx lr
+*/
+
+void test_clzl_mem (unsigned long *a)
+{
+ *gui = __clzl (*a);
+}
+
+/*
+** test_cls_mem:
+** ...
+** clz r[0-9]+, r[0-9]+
+** ...
+** bx lr
+*/
+
+void test_cls_mem (uint32_t *a)
+{
+ *gui = __cls (*a);
+}
+
+/*
+** test_clsl_mem:
+** ...
+** clz r[0-9]+, r[0-9]+
+** ...
+** bx lr
+*/
+
+void test_clsl_mem (unsigned long *a)
+{
+ *gui = __clsl (*a);
+}
+
+/*
+** test_rev_mem:
+** ...
+** rev r[0-9]+, r[0-9]+
+** ...
+** bx lr
+*/
+
+void test_rev_mem (uint32_t *a)
+{
+ *g32 = __rev (*a);
+}
+
+/*
+** test_revl_mem:
+** ...
+** rev r[0-9]+, r[0-9]+
+** ...
+** bx lr
+*/
+
+void test_revl_mem (unsigned long *a)
+{
+ *gul = __revl (*a);
+}
+
+/*
+** test_revll_mem:
+** ...
+** rev r[0-9]+, r[0-9]+
+** ...
+** bx lr
+*/
+
+void test_revll_mem (uint64_t *a)
+{
+ *g64 = __revll (*a);
+}
+
+/*
+** test_ror_mem:
+** ...
+** ror r[0-9]+, r[0-9]+, r[0-9]+
+** ...
+** bx lr
+*/
+
+void test_ror_mem (uint32_t *a, uint32_t *r)
+{
+ *g32 = __ror (*a, *r);
+}
+
+/*
+** test_rorl_mem:
+** ...
+** ror r[0-9]+, r[0-9]+, r[0-9]+
+** ...
+** bx lr
+*/
+
+void test_rorl_mem (unsigned long *a, uint32_t *r)
+{
+ *gul = __rorl (*a, *r);
+}
+
+/*
+** test_revsh_mem:
+** ...
+** rev16 r[0-9]+, r[0-9]+
+** ...
+** bx lr
+*/
+
+void test_revsh_mem (int16_t *a)
+{
+ *g16 = __revsh (*a);
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-rbit.c b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-rbit.c
new file mode 100644
index 0000000..d1fe274
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-rbit.c
@@ -0,0 +1,98 @@
+/* Test the ACLE data intrinsics existence for specific instruction. */
+/* { dg-do run } */
+/* { dg-require-effective-target arm_arch_v6t2_ok } */
+/* { dg-additional-options "--save-temps -O1" } */
+/* { dg-add-options arm_arch_v6t2 } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include "arm_acle.h"
+
+extern void abort (void);
+
+uint32_t *g32;
+unsigned long *gul;
+uint64_t *g64;
+
+/*
+** test_rbit:
+** rbit r0, r0
+** bx lr
+*/
+
+uint32_t test_rbit (uint32_t a)
+{
+ return __rbit (a);
+}
+
+/*
+** test_rbitl:
+** rbit r0, r0
+** bx lr
+*/
+
+unsigned long test_rbitl (unsigned long a)
+{
+ return __rbitl (a);
+}
+
+/*
+** test_rbitll:
+** mov (r[0-9]+), r0
+** rbit r0, r1
+** rbit r1, \1
+** bx lr
+*/
+
+uint64_t test_rbitll (uint64_t a)
+{
+ return __rbitll (a);
+}
+
+/*
+** test_rbit_mem:
+** ...
+** rbit r[0-9]+, r[0-9]+
+** ...
+** bx lr
+*/
+
+void test_rbit_mem (uint32_t *a)
+{
+ *g32 = __rbit (*a);
+}
+
+/*
+** test_rbitl_mem:
+** ...
+** rbit r[0-9]+, r[0-9]+
+** ...
+** bx lr
+*/
+
+void test_rbitl_mem (unsigned long *a)
+{
+ *gul = __rbitl (*a);
+}
+
+/*
+** test_rbitll_mem:
+** ...
+** rbit r[0-9]+, r[0-9]+
+** ...
+** bx lr
+*/
+
+void test_rbitll_mem (uint64_t *a)
+{
+ *g64 = __rbitll (*a);
+}
+
+int
+main (int argc, char **argv)
+{
+ if (__rbit(0x12345678) != 0x1e6a2c48) { abort(); }
+ if (__rbitl(0x12345678) != 0x1e6a2c48) { abort(); }
+ if (__rbitll(0x1234567890abcdef) != 0xf7b3d5091e6a2c48) { abort(); }
+ return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/acle/data-intrinsics.c b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics.c
new file mode 100644
index 0000000..5c05b34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include "arm_acle.h"
+
+volatile uint32_t clz_in = 0x1234;
+volatile uint32_t rev_in = 0x12345678;
+volatile uint64_t rev64_in = 0x1234567890abcdef;
+
+int
+main (int argc, char **argv)
+{
+ if (__clz(clz_in) != 19) { __builtin_abort(); }
+ if (__clzl(clz_in) != 19) { __builtin_abort(); }
+ if (__clzll(clz_in) != 51) { __builtin_abort(); }
+ if (__cls(clz_in) != 18) { __builtin_abort(); }
+ if (__clsl(clz_in) != 18) { __builtin_abort(); }
+ if (__clsll(clz_in) != 50) { __builtin_abort(); }
+ if (__rev(rev_in) != 0x78563412) { __builtin_abort(); }
+ if (__revl(rev_in) != 0x78563412) { __builtin_abort(); }
+ if (__revll(rev64_in) != 0xefcdab9078563412) { __builtin_abort(); }
+ if (__rev16(rev_in) != 0x34127856) { __builtin_abort(); }
+ if (__rev16l(rev_in) != 0x34127856) { __builtin_abort(); }
+ if (__rev16ll(rev64_in) != 0x34127856ab90efcd) { __builtin_abort(); }
+ if (__revsh(clz_in) != 0x3412) { __builtin_abort(); }
+ return 0;
+}