diff options
author | Sean Anderson <sean.anderson@seco.com> | 2023-07-31 17:27:33 -0400 |
---|---|---|
committer | Tom Rini <trini@konsulko.com> | 2023-08-17 16:39:20 -0400 |
commit | 726a802fdaf1ffb4ca95ebf6910a738781137ef5 (patch) | |
tree | 7d2df62ee83092d7b8b6032f48a980b2a212df8c | |
parent | 14ba0a8bbc342a677982acc787ae542b84ed7993 (diff) | |
download | u-boot-726a802fdaf1ffb4ca95ebf6910a738781137ef5.zip u-boot-726a802fdaf1ffb4ca95ebf6910a738781137ef5.tar.gz u-boot-726a802fdaf1ffb4ca95ebf6910a738781137ef5.tar.bz2 |
arm: Use builtins for ffs/fls
Since ARMv5, the clz instruction allows for efficient implementation of
ffs/fls with builtins. Until ARMv7 (with Thumb-2), this instruction is
only available in ARM mode. LTO makes it difficult to force specific
functions to be in ARM mode, as it is effectively a form of very
aggressive inlining. To work around this, fls/ffs are implemented in
assembly for ARMv5 and ARMv6 when compiling U-Boot in Thumb mode.
Overall, this saves around 75 bytes per call.
This code is synced with v5.15 of the Linux kernel.
Signed-off-by: Sean Anderson <sean.anderson@seco.com>
Reviewed-by: Tom Rini <trini@konsulko.com>
-rw-r--r-- | arch/arm/include/asm/bitops.h | 27 | ||||
-rw-r--r-- | arch/arm/lib/Makefile | 5 | ||||
-rw-r--r-- | arch/arm/lib/bitops.S | 45 | ||||
-rw-r--r-- | include/asm-generic/bitops/builtin-__ffs.h | 16 | ||||
-rw-r--r-- | include/asm-generic/bitops/builtin-__fls.h | 16 | ||||
-rw-r--r-- | include/asm-generic/bitops/builtin-ffs.h | 15 | ||||
-rw-r--r-- | include/asm-generic/bitops/builtin-fls.h | 17 |
7 files changed, 140 insertions, 1 deletions
diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h index fa85486..8e89783 100644 --- a/arch/arm/include/asm/bitops.h +++ b/arch/arm/include/asm/bitops.h @@ -15,9 +15,34 @@ #ifndef __ASM_ARM_BITOPS_H #define __ASM_ARM_BITOPS_H +#if __LINUX_ARM_ARCH__ < 5 + #include <asm-generic/bitops/__ffs.h> #include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/fls.h> + +#else + +#define PLATFORM_FFS +#define PLATFORM_FLS + +#if !IS_ENABLED(CONFIG_HAS_THUMB2) && CONFIG_IS_ENABLED(SYS_THUMB_BUILD) + +unsigned long __fls(unsigned long word); +unsigned long __ffs(unsigned long word); +int fls(unsigned int x); +int ffs(int x); + +#else + +#include <asm-generic/bitops/builtin-__fls.h> +#include <asm-generic/bitops/builtin-__ffs.h> +#include <asm-generic/bitops/builtin-fls.h> +#include <asm-generic/bitops/builtin-ffs.h> + +#endif +#endif + #include <asm-generic/bitops/fls64.h> #ifdef __KERNEL__ @@ -113,7 +138,7 @@ static inline int test_bit(int nr, const void * addr) static inline int __ilog2(unsigned int x) { - return generic_fls(x) - 1; + return fls(x) - 1; } #define ffz(x) __ffs(~(x)) diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 62cf80f..b1bcd37 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -113,6 +113,11 @@ AFLAGS_REMOVE_memset.o := -mthumb -mthumb-interwork AFLAGS_REMOVE_memcpy.o := -mthumb -mthumb-interwork AFLAGS_memset.o := -DMEMSET_NO_THUMB_BUILD AFLAGS_memcpy.o := -DMEMCPY_NO_THUMB_BUILD + +# This is only necessary to force ARM mode on THUMB1 targets. +ifneq ($(CONFIG_SYS_ARM_ARCH),4) +obj-y += bitops.o +endif endif endif diff --git a/arch/arm/lib/bitops.S b/arch/arm/lib/bitops.S new file mode 100644 index 0000000..29d1524 --- /dev/null +++ b/arch/arm/lib/bitops.S @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Sean Anderson <sean.anderson@seco.com> + * + * ARM bitops to call when using THUMB1, which doesn't have these instructions. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +.pushsection .text.__fls +ENTRY(__fls) + clz r0, r0 + rsb r0, r0, #31 + ret lr +ENDPROC(__fls) +.popsection + +.pushsection .text.__ffs +ENTRY(__ffs) + rsb r3, r0, #0 + and r0, r0, r3 + clz r0, r0 + rsb r0, r0, #31 + ret lr +ENDPROC(__ffs) +.popsection + +.pushsection .text.fls +ENTRY(fls) + cmp r0, #0 + clzne r0, r0 + rsbne r0, r0, #32 + ret lr +ENDPROC(fls) +.popsection + +.pushsection .text.ffs +ENTRY(ffs) + rsb r3, r0, #0 + and r0, r0, r3 + clz r0, r0 + rsb r0, r0, #32 + ret lr +ENDPROC(ffs) +.popsection diff --git a/include/asm-generic/bitops/builtin-__ffs.h b/include/asm-generic/bitops/builtin-__ffs.h new file mode 100644 index 0000000..87024da --- /dev/null +++ b/include/asm-generic/bitops/builtin-__ffs.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_BITOPS_BUILTIN___FFS_H_ +#define _ASM_GENERIC_BITOPS_BUILTIN___FFS_H_ + +/** + * __ffs - find first bit in word. + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static __always_inline unsigned long __ffs(unsigned long word) +{ + return __builtin_ctzl(word); +} + +#endif diff --git a/include/asm-generic/bitops/builtin-__fls.h b/include/asm-generic/bitops/builtin-__fls.h new file mode 100644 index 0000000..43a5aa9 --- /dev/null +++ b/include/asm-generic/bitops/builtin-__fls.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_BITOPS_BUILTIN___FLS_H_ +#define _ASM_GENERIC_BITOPS_BUILTIN___FLS_H_ + +/** + * __fls - find last (most-significant) set bit in a long word + * @word: the word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +static __always_inline unsigned long __fls(unsigned long word) +{ + return (sizeof(word) * 8) - 1 - __builtin_clzl(word); +} + +#endif diff --git a/include/asm-generic/bitops/builtin-ffs.h b/include/asm-generic/bitops/builtin-ffs.h new file mode 100644 index 0000000..7b12932 --- /dev/null +++ b/include/asm-generic/bitops/builtin-ffs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_BITOPS_BUILTIN_FFS_H_ +#define _ASM_GENERIC_BITOPS_BUILTIN_FFS_H_ + +/** + * ffs - find first bit set + * @x: the word to search + * + * This is defined the same way as + * the libc and compiler builtin ffs routines, therefore + * differs in spirit from ffz (man ffs). + */ +#define ffs(x) __builtin_ffs(x) + +#endif diff --git a/include/asm-generic/bitops/builtin-fls.h b/include/asm-generic/bitops/builtin-fls.h new file mode 100644 index 0000000..c8455cc --- /dev/null +++ b/include/asm-generic/bitops/builtin-fls.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_BITOPS_BUILTIN_FLS_H_ +#define _ASM_GENERIC_BITOPS_BUILTIN_FLS_H_ + +/** + * fls - find last (most-significant) bit set + * @x: the word to search + * + * This is defined the same way as ffs. + * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. + */ +static __always_inline int fls(unsigned int x) +{ + return x ? sizeof(x) * 8 - __builtin_clz(x) : 0; +} + +#endif |