diff options
author | Wilco Dijkstra <wilco.dijkstra@arm.com> | 2022-11-15 14:38:55 +0000 |
---|---|---|
committer | Wilco Dijkstra <wilco.dijkstra@arm.com> | 2022-11-15 15:08:33 +0000 |
commit | d1288d850944f69a795e4ff444a427eba3fec11b (patch) | |
tree | 0357dd0e95cb2e1825217396f7ff76fe6b1a377b /libatomic/config | |
parent | 5925f0ec54ab5ed773935eec09a602f58fa0ca2c (diff) | |
download | gcc-d1288d850944f69a795e4ff444a427eba3fec11b.zip gcc-d1288d850944f69a795e4ff444a427eba3fec11b.tar.gz gcc-d1288d850944f69a795e4ff444a427eba3fec11b.tar.bz2 |
libatomic: Add support for LSE and LSE2
Add support for AArch64 LSE and LSE2 to libatomic. Disable outline atomics,
and use LSE ifuncs for 1-8 byte atomics and LSE2 ifuncs for 16-byte atomics.
On Neoverse V1, 16-byte atomics are ~4x faster due to avoiding locks.
Note this is safe since we swap all 16-byte atomics using the same ifunc,
so they either use locks or LSE2 atomics, but never a mix. This also improves
ABI compatibility with LLVM: its inlined 16-byte atomics are compatible with
the new libatomic if LSE2 is supported.
libatomic/
* Makefile.in: Regenerated with automake 1.15.1.
* Makefile.am: Add atomic_16.S for AArch64.
* configure.tgt: Disable outline atomics in AArch64 build.
* config/linux/aarch64/atomic_16.S: New file - implementation of
ifuncs for 16-byte atomics.
* config/linux/aarch64/host-config.h: Enable ifuncs, use LSE
(HWCAP_ATOMICS) for 1-8-byte atomics and LSE2 (HWCAP_USCAT) for
16-byte atomics.
Diffstat (limited to 'libatomic/config')
-rw-r--r-- | libatomic/config/linux/aarch64/atomic_16.S | 462 | ||||
-rw-r--r-- | libatomic/config/linux/aarch64/host-config.h | 18 |
2 files changed, 475 insertions, 5 deletions
diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S new file mode 100644 index 0000000..bced729 --- /dev/null +++ b/libatomic/config/linux/aarch64/atomic_16.S @@ -0,0 +1,462 @@ +/* Copyright (C) 2022 Free Software Foundation, Inc. + + This file is part of the GNU Atomic Library (libatomic). + + Libatomic is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + + + .arch armv8-a+lse + +#define ENTRY(name) \ + .global name; \ + .hidden name; \ + .type name,%function; \ + .p2align 4; \ +name: \ + .cfi_startproc; \ + hint 34 // bti c + +#define END(name) \ + .cfi_endproc; \ + .size name, .-name; + +#define res0 x0 +#define res1 x1 +#define in0 x2 +#define in1 x3 +#define tmp0 x6 +#define tmp1 x7 +#define exp0 x8 +#define exp1 x9 + +#ifdef __AARCH64EB__ +# define reslo x1 +# define reshi x0 +# define inlo x3 +# define inhi x2 +# define tmplo x7 +# define tmphi x6 +#else +# define reslo x0 +# define reshi x1 +# define inlo x2 +# define inhi x3 +# define tmplo x6 +# define tmphi x7 +#endif + +#define RELAXED 0 +#define CONSUME 1 +#define ACQUIRE 2 +#define RELEASE 3 +#define ACQ_REL 4 +#define SEQ_CST 5 + + +ENTRY (libat_load_16_i1) + cbnz w1, 1f + ldp res0, res1, [x0] + ret +1: + cmp w1, ACQUIRE + b.hi 2f + ldp res0, res1, [x0] + dmb ishld + ret +2: + ldp res0, res1, [x0] + dmb ish + ret +END (libat_load_16_i1) + + +ENTRY (libat_store_16_i1) + cbnz w4, 1f + stp in0, in1, [x0] + ret +1: + dmb ish + stp in0, in1, [x0] + cmp w4, SEQ_CST + beq 2f + ret +2: + dmb ish + ret +END (libat_store_16_i1) + + +ENTRY (libat_exchange_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + stxp w4, in0, in1, [x5] + cbnz w4, 1b + ret +2: + cmp w4, ACQUIRE + b.hi 4f +3: + ldaxp res0, res1, [x5] + stxp w4, in0, in1, [x5] + cbnz w4, 3b + ret +4: + cmp w4, RELEASE + b.ne 6f +5: + ldxp res0, res1, [x5] + stlxp w4, in0, in1, [x5] + cbnz w4, 5b + ret +6: + ldaxp res0, res1, [x5] + stlxp w4, in0, in1, [x5] + cbnz w4, 6b + ret +END (libat_exchange_16_i1) + + +ENTRY (libat_compare_exchange_16_i1) + ldp exp0, exp1, [x1] + mov tmp0, exp0 + mov tmp1, exp1 + cbz w4, 2f + cmp w4, RELEASE + b.hs 3f + caspa exp0, exp1, in0, in1, [x0] +0: + cmp exp0, tmp0 + ccmp exp1, tmp1, 0, eq + bne 1f + mov x0, 1 + ret +1: + stp exp0, exp1, [x1] + mov x0, 0 + ret +2: + casp exp0, exp1, in0, in1, [x0] + b 0b +3: + b.hi 4f + caspl exp0, exp1, in0, in1, [x0] + b 0b +4: + caspal exp0, exp1, in0, in1, [x0] + b 0b +END (libat_compare_exchange_16_i1) + + +ENTRY (libat_fetch_add_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + adds tmplo, reslo, inlo + adc tmphi, reshi, inhi + stxp w4, tmp0, tmp1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + adds tmplo, reslo, inlo + adc tmphi, reshi, inhi + stlxp w4, tmp0, tmp1, [x5] + cbnz w4, 2b + ret +END (libat_fetch_add_16_i1) + + +ENTRY (libat_add_fetch_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + adds reslo, reslo, inlo + adc reshi, reshi, inhi + stxp w4, res0, res1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + adds reslo, reslo, inlo + adc reshi, reshi, inhi + stlxp w4, res0, res1, [x5] + cbnz w4, 2b + ret +END (libat_add_fetch_16_i1) + + +ENTRY (libat_fetch_sub_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + subs tmplo, reslo, inlo + sbc tmphi, reshi, inhi + stxp w4, tmp0, tmp1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + subs tmplo, reslo, inlo + sbc tmphi, reshi, inhi + stlxp w4, tmp0, tmp1, [x5] + cbnz w4, 2b + ret +END (libat_fetch_sub_16_i1) + + +ENTRY (libat_sub_fetch_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + subs reslo, reslo, inlo + sbc reshi, reshi, inhi + stxp w4, res0, res1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + subs reslo, reslo, inlo + sbc reshi, reshi, inhi + stlxp w4, res0, res1, [x5] + cbnz w4, 2b + ret +END (libat_sub_fetch_16_i1) + + +ENTRY (libat_fetch_or_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + orr tmp0, res0, in0 + orr tmp1, res1, in1 + stxp w4, tmp0, tmp1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + orr tmp0, res0, in0 + orr tmp1, res1, in1 + stlxp w4, tmp0, tmp1, [x5] + cbnz w4, 2b + ret +END (libat_fetch_or_16_i1) + + +ENTRY (libat_or_fetch_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + orr res0, res0, in0 + orr res1, res1, in1 + stxp w4, res0, res1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + orr res0, res0, in0 + orr res1, res1, in1 + stlxp w4, res0, res1, [x5] + cbnz w4, 2b + ret +END (libat_or_fetch_16_i1) + + +ENTRY (libat_fetch_and_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + and tmp0, res0, in0 + and tmp1, res1, in1 + stxp w4, tmp0, tmp1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + and tmp0, res0, in0 + and tmp1, res1, in1 + stlxp w4, tmp0, tmp1, [x5] + cbnz w4, 2b + ret +END (libat_fetch_and_16_i1) + + +ENTRY (libat_and_fetch_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + and res0, res0, in0 + and res1, res1, in1 + stxp w4, res0, res1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + and res0, res0, in0 + and res1, res1, in1 + stlxp w4, res0, res1, [x5] + cbnz w4, 2b + ret +END (libat_and_fetch_16_i1) + + +ENTRY (libat_fetch_xor_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + eor tmp0, res0, in0 + eor tmp1, res1, in1 + stxp w4, tmp0, tmp1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + eor tmp0, res0, in0 + eor tmp1, res1, in1 + stlxp w4, tmp0, tmp1, [x5] + cbnz w4, 2b + ret +END (libat_fetch_xor_16_i1) + + +ENTRY (libat_xor_fetch_16_i1) + mov x5, x0 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + eor res0, res0, in0 + eor res1, res1, in1 + stxp w4, res0, res1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + eor res0, res0, in0 + eor res1, res1, in1 + stlxp w4, res0, res1, [x5] + cbnz w4, 2b + ret +END (libat_xor_fetch_16_i1) + + +ENTRY (libat_fetch_nand_16_i1) + mov x5, x0 + mvn in0, in0 + mvn in1, in1 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + orn tmp0, in0, res0 + orn tmp1, in1, res1 + stxp w4, tmp0, tmp1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + orn tmp0, in0, res0 + orn tmp1, in1, res1 + stlxp w4, tmp0, tmp1, [x5] + cbnz w4, 2b + ret +END (libat_fetch_nand_16_i1) + + +ENTRY (libat_nand_fetch_16_i1) + mov x5, x0 + mvn in0, in0 + mvn in1, in1 + cbnz w4, 2f +1: + ldxp res0, res1, [x5] + orn res0, in0, res0 + orn res1, in1, res1 + stxp w4, res0, res1, [x5] + cbnz w4, 1b + ret +2: + ldaxp res0, res1, [x5] + orn res0, in0, res0 + orn res1, in1, res1 + stlxp w4, res0, res1, [x5] + cbnz w4, 2b + ret +END (libat_nand_fetch_16_i1) + + +ENTRY (libat_test_and_set_16_i1) + mov w2, 1 + cbnz w1, 2f + swpb w0, w2, [x0] + ret + +2: swpalb w0, w2, [x0] + ret +END (libat_test_and_set_16_i1) + + +/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */ +#define FEATURE_1_AND 0xc0000000 +#define FEATURE_1_BTI 1 +#define FEATURE_1_PAC 2 + +/* Supported features based on the code generation options. */ +#if defined(__ARM_FEATURE_BTI_DEFAULT) +# define BTI_FLAG FEATURE_1_BTI +#else +# define BTI_FLAG 0 +#endif + +#if __ARM_FEATURE_PAC_DEFAULT & 3 +# define PAC_FLAG FEATURE_1_PAC +#else +# define PAC_FLAG 0 +#endif + +/* Add a NT_GNU_PROPERTY_TYPE_0 note. */ +#define GNU_PROPERTY(type, value) \ + .section .note.gnu.property, "a"; \ + .p2align 3; \ + .word 4; \ + .word 16; \ + .word 5; \ + .asciz "GNU"; \ + .word type; \ + .word 4; \ + .word value; \ + .word 0; + +#if defined(__linux__) || defined(__FreeBSD__) +.section .note.GNU-stack, "", %progbits + +/* Add GNU property note if built with branch protection. */ +# if (BTI_FLAG|PAC_FLAG) != 0 +GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG) +# endif +#endif diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h index 769ba6e..d9b5ab3 100644 --- a/libatomic/config/linux/aarch64/host-config.h +++ b/libatomic/config/linux/aarch64/host-config.h @@ -22,14 +22,22 @@ <http://www.gnu.org/licenses/>. */ #if HAVE_IFUNC -#include <stdlib.h> +#include <sys/auxv.h> -# ifdef HWCAP_ATOMICS -# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS) +#ifdef HWCAP_USCAT +# if N == 16 +# define IFUNC_COND_1 (hwcap & HWCAP_USCAT) # else -# define IFUNC_COND_1 (false) +# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS) # endif -# define IFUNC_NCOND(N) (1) +#else +# define IFUNC_COND_1 (false) +#endif +#define IFUNC_NCOND(N) (1) + +#if N == 16 && IFUNC_ALT != 0 +# define DONE 1 +#endif #endif /* HAVE_IFUNC */ |