/* Copyright (C) 2022-2024 Free Software Foundation, Inc.
This file is part of the GNU Atomic Library (libatomic).
Libatomic is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
. */
/* AArch64 128-bit lock-free atomic implementation.
128-bit atomics are now lock-free for all AArch64 architecture versions.
This is backwards compatible with existing binaries (as we swap all uses
of 128-bit atomics via an ifunc) and gives better performance than locking
atomics.
128-bit atomic loads use a exclusive loop if LSE2 is not supported.
This results in an implicit store which is invisible to software as long
as the given address is writeable. Since all other atomics have explicit
writes, this will be true when using atomics in actual code.
The libat__16 entry points are ARMv8.0.
The libat__16_i1 entry points are used when LSE128 is available.
The libat__16_i2 entry points are used when LSE2 is available. */
#if HAVE_FEAT_LSE128
.arch armv9-a+lse128
#else
.arch armv8-a+lse
#endif
#define LSE128(NAME) NAME##_i1
#define LSE2(NAME) NAME##_i2
#define CORE(NAME) NAME
#define ENTRY_FEAT(NAME, FEAT) \
ENTRY (FEAT (NAME))
#define ENTRY(NAME) \
.global NAME; \
.hidden NAME; \
.type NAME,%function; \
.p2align 4; \
NAME: \
.cfi_startproc; \
hint 34 // bti c
#define END_FEAT(NAME, FEAT) \
END (FEAT (NAME))
#define END(NAME) \
.cfi_endproc; \
.size NAME, .-NAME;
#define ALIAS(NAME, FROM, TO) \
ALIAS1 (FROM (NAME),TO (NAME))
#define ALIAS1(ALIAS, NAME) \
.global ALIAS; \
.set ALIAS, NAME;
#define res0 x0
#define res1 x1
#define in0 x2
#define in1 x3
#define tmp0 x6
#define tmp1 x7
#define exp0 x8
#define exp1 x9
#ifdef __AARCH64EB__
# define reslo x1
# define reshi x0
# define inlo x3
# define inhi x2
# define tmplo x7
# define tmphi x6
#else
# define reslo x0
# define reshi x1
# define inlo x2
# define inhi x3
# define tmplo x6
# define tmphi x7
#endif
#define RELAXED 0
#define CONSUME 1
#define ACQUIRE 2
#define RELEASE 3
#define ACQ_REL 4
#define SEQ_CST 5
ENTRY (libat_load_16)
mov x5, x0
cbnz w1, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/SEQ_CST. */
2: ldaxp res0, res1, [x5]
stxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
END (libat_load_16)
ENTRY_FEAT (libat_load_16, LSE2)
cbnz w1, 1f
/* RELAXED. */
ldp res0, res1, [x0]
ret
1:
cmp w1, SEQ_CST
b.eq 2f
/* ACQUIRE/CONSUME (Load-AcquirePC semantics). */
ldp res0, res1, [x0]
dmb ishld
ret
/* SEQ_CST. */
2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */
ldp res0, res1, [x0]
dmb ishld
ret
END_FEAT (libat_load_16, LSE2)
ENTRY (libat_store_16)
cbnz w4, 2f
/* RELAXED. */
1: ldxp xzr, tmp0, [x0]
stxp w4, in0, in1, [x0]
cbnz w4, 1b
ret
/* RELEASE/SEQ_CST. */
2: ldxp xzr, tmp0, [x0]
stlxp w4, in0, in1, [x0]
cbnz w4, 2b
ret
END (libat_store_16)
ENTRY_FEAT (libat_store_16, LSE2)
cbnz w4, 1f
/* RELAXED. */
stp in0, in1, [x0]
ret
/* RELEASE/SEQ_CST. */
1: ldxp xzr, tmp0, [x0]
stlxp w4, in0, in1, [x0]
cbnz w4, 1b
ret
END_FEAT (libat_store_16, LSE2)
ENTRY (libat_exchange_16)
mov x5, x0
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
stxp w4, in0, in1, [x5]
cbnz w4, 1b
ret
2:
cmp w4, ACQUIRE
b.hi 4f
/* ACQUIRE/CONSUME. */
3: ldaxp res0, res1, [x5]
stxp w4, in0, in1, [x5]
cbnz w4, 3b
ret
/* RELEASE/ACQ_REL/SEQ_CST. */
4: ldaxp res0, res1, [x5]
stlxp w4, in0, in1, [x5]
cbnz w4, 4b
ret
END (libat_exchange_16)
#if HAVE_FEAT_LSE128
ENTRY_FEAT (libat_exchange_16, LSE128)
mov tmp0, x0
mov res0, in0
mov res1, in1
cbnz w4, 1f
/* RELAXED. */
swpp res0, res1, [tmp0]
ret
1:
cmp w4, ACQUIRE
b.hi 2f
/* ACQUIRE/CONSUME. */
swppa res0, res1, [tmp0]
ret
/* RELEASE/ACQ_REL/SEQ_CST. */
2: swppal res0, res1, [tmp0]
ret
END_FEAT (libat_exchange_16, LSE128)
#endif
ENTRY (libat_compare_exchange_16)
ldp exp0, exp1, [x1]
cbz w4, 3f
cmp w4, RELEASE
b.hs 5f
/* ACQUIRE/CONSUME. */
1: ldaxp tmp0, tmp1, [x0]
cmp tmp0, exp0
ccmp tmp1, exp1, 0, eq
csel tmp0, in0, tmp0, eq
csel tmp1, in1, tmp1, eq
stxp w4, tmp0, tmp1, [x0]
cbnz w4, 1b
beq 2f
stp tmp0, tmp1, [x1]
2: cset x0, eq
ret
/* RELAXED. */
3: ldxp tmp0, tmp1, [x0]
cmp tmp0, exp0
ccmp tmp1, exp1, 0, eq
csel tmp0, in0, tmp0, eq
csel tmp1, in1, tmp1, eq
stxp w4, tmp0, tmp1, [x0]
cbnz w4, 3b
beq 4f
stp tmp0, tmp1, [x1]
4: cset x0, eq
ret
/* RELEASE/ACQ_REL/SEQ_CST. */
5: ldaxp tmp0, tmp1, [x0]
cmp tmp0, exp0
ccmp tmp1, exp1, 0, eq
csel tmp0, in0, tmp0, eq
csel tmp1, in1, tmp1, eq
stlxp w4, tmp0, tmp1, [x0]
cbnz w4, 5b
beq 6f
stp tmp0, tmp1, [x1]
6: cset x0, eq
ret
END (libat_compare_exchange_16)
ENTRY_FEAT (libat_compare_exchange_16, LSE2)
ldp exp0, exp1, [x1]
mov tmp0, exp0
mov tmp1, exp1
cbz w4, 2f
cmp w4, RELEASE
b.hs 3f
/* ACQUIRE/CONSUME. */
caspa exp0, exp1, in0, in1, [x0]
0:
cmp exp0, tmp0
ccmp exp1, tmp1, 0, eq
bne 1f
mov x0, 1
ret
1:
stp exp0, exp1, [x1]
mov x0, 0
ret
/* RELAXED. */
2: casp exp0, exp1, in0, in1, [x0]
b 0b
/* RELEASE. */
3: b.hi 4f
caspl exp0, exp1, in0, in1, [x0]
b 0b
/* ACQ_REL/SEQ_CST. */
4: caspal exp0, exp1, in0, in1, [x0]
b 0b
END_FEAT (libat_compare_exchange_16, LSE2)
ENTRY (libat_fetch_add_16)
mov x5, x0
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
adds tmplo, reslo, inlo
adc tmphi, reshi, inhi
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: ldaxp res0, res1, [x5]
adds tmplo, reslo, inlo
adc tmphi, reshi, inhi
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
END (libat_fetch_add_16)
ENTRY (libat_add_fetch_16)
mov x5, x0
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
adds reslo, reslo, inlo
adc reshi, reshi, inhi
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: ldaxp res0, res1, [x5]
adds reslo, reslo, inlo
adc reshi, reshi, inhi
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
END (libat_add_fetch_16)
ENTRY (libat_fetch_sub_16)
mov x5, x0
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
subs tmplo, reslo, inlo
sbc tmphi, reshi, inhi
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: ldaxp res0, res1, [x5]
subs tmplo, reslo, inlo
sbc tmphi, reshi, inhi
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
END (libat_fetch_sub_16)
ENTRY (libat_sub_fetch_16)
mov x5, x0
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
subs reslo, reslo, inlo
sbc reshi, reshi, inhi
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: ldaxp res0, res1, [x5]
subs reslo, reslo, inlo
sbc reshi, reshi, inhi
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
END (libat_sub_fetch_16)
ENTRY (libat_fetch_or_16)
mov x5, x0
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
orr tmp0, res0, in0
orr tmp1, res1, in1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: ldaxp res0, res1, [x5]
orr tmp0, res0, in0
orr tmp1, res1, in1
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
END (libat_fetch_or_16)
#if HAVE_FEAT_LSE128
ENTRY_FEAT (libat_fetch_or_16, LSE128)
mov tmp0, x0
mov res0, in0
mov res1, in1
cbnz w4, 1f
/* RELAXED. */
ldsetp res0, res1, [tmp0]
ret
1:
cmp w4, ACQUIRE
b.hi 2f
/* ACQUIRE/CONSUME. */
ldsetpa res0, res1, [tmp0]
ret
/* RELEASE/ACQ_REL/SEQ_CST. */
2: ldsetpal res0, res1, [tmp0]
ret
END_FEAT (libat_fetch_or_16, LSE128)
#endif
ENTRY (libat_or_fetch_16)
mov x5, x0
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
orr res0, res0, in0
orr res1, res1, in1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: ldaxp res0, res1, [x5]
orr res0, res0, in0
orr res1, res1, in1
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
END (libat_or_fetch_16)
#if HAVE_FEAT_LSE128
ENTRY_FEAT (libat_or_fetch_16, LSE128)
cbnz w4, 1f
mov tmp0, in0
mov tmp1, in1
/* RELAXED. */
ldsetp in0, in1, [x0]
orr res0, in0, tmp0
orr res1, in1, tmp1
ret
1:
cmp w4, ACQUIRE
b.hi 2f
/* ACQUIRE/CONSUME. */
ldsetpa in0, in1, [x0]
orr res0, in0, tmp0
orr res1, in1, tmp1
ret
/* RELEASE/ACQ_REL/SEQ_CST. */
2: ldsetpal in0, in1, [x0]
orr res0, in0, tmp0
orr res1, in1, tmp1
ret
END_FEAT (libat_or_fetch_16, LSE128)
#endif
ENTRY (libat_fetch_and_16)
mov x5, x0
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
and tmp0, res0, in0
and tmp1, res1, in1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: ldaxp res0, res1, [x5]
and tmp0, res0, in0
and tmp1, res1, in1
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
END (libat_fetch_and_16)
#if HAVE_FEAT_LSE128
ENTRY_FEAT (libat_fetch_and_16, LSE128)
mov tmp0, x0
mvn res0, in0
mvn res1, in1
cbnz w4, 1f
/* RELAXED. */
ldclrp res0, res1, [tmp0]
ret
1:
cmp w4, ACQUIRE
b.hi 2f
/* ACQUIRE/CONSUME. */
ldclrpa res0, res1, [tmp0]
ret
/* RELEASE/ACQ_REL/SEQ_CST. */
2: ldclrpal res0, res1, [tmp0]
ret
END_FEAT (libat_fetch_and_16, LSE128)
#endif
ENTRY (libat_and_fetch_16)
mov x5, x0
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
and res0, res0, in0
and res1, res1, in1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: ldaxp res0, res1, [x5]
and res0, res0, in0
and res1, res1, in1
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
END (libat_and_fetch_16)
#if HAVE_FEAT_LSE128
ENTRY_FEAT (libat_and_fetch_16, LSE128)
mvn tmp0, in0
mvn tmp0, in1
cbnz w4, 1f
/* RELAXED. */
ldclrp tmp0, tmp1, [x0]
and res0, tmp0, in0
and res1, tmp1, in1
ret
1:
cmp w4, ACQUIRE
b.hi 2f
/* ACQUIRE/CONSUME. */
ldclrpa tmp0, tmp1, [x0]
and res0, tmp0, in0
and res1, tmp1, in1
ret
/* RELEASE/ACQ_REL/SEQ_CST. */
2: ldclrpal tmp0, tmp1, [x5]
and res0, tmp0, in0
and res1, tmp1, in1
ret
END_FEAT (libat_and_fetch_16, LSE128)
#endif
ENTRY (libat_fetch_xor_16)
mov x5, x0
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
eor tmp0, res0, in0
eor tmp1, res1, in1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: ldaxp res0, res1, [x5]
eor tmp0, res0, in0
eor tmp1, res1, in1
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
END (libat_fetch_xor_16)
ENTRY (libat_xor_fetch_16)
mov x5, x0
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
eor res0, res0, in0
eor res1, res1, in1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: ldaxp res0, res1, [x5]
eor res0, res0, in0
eor res1, res1, in1
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
END (libat_xor_fetch_16)
ENTRY (libat_fetch_nand_16)
mov x5, x0
mvn in0, in0
mvn in1, in1
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
orn tmp0, in0, res0
orn tmp1, in1, res1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: ldaxp res0, res1, [x5]
orn tmp0, in0, res0
orn tmp1, in1, res1
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
END (libat_fetch_nand_16)
ENTRY (libat_nand_fetch_16)
mov x5, x0
mvn in0, in0
mvn in1, in1
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
orn res0, in0, res0
orn res1, in1, res1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: ldaxp res0, res1, [x5]
orn res0, in0, res0
orn res1, in1, res1
stlxp w4, res0, res1, [x5]
cbnz w4, 2b
ret
END (libat_nand_fetch_16)
/* __atomic_test_and_set is always inlined, so this entry is unused and
only required for completeness. */
ENTRY (libat_test_and_set_16)
/* RELAXED/ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
mov x5, x0
1: ldaxrb w0, [x5]
stlxrb w4, w2, [x5]
cbnz w4, 1b
ret
END (libat_test_and_set_16)
/* Alias entry points which are the same in LSE2 and LSE128. */
#if !HAVE_FEAT_LSE128
ALIAS (libat_exchange_16, LSE128, LSE2)
ALIAS (libat_fetch_or_16, LSE128, LSE2)
ALIAS (libat_fetch_and_16, LSE128, LSE2)
ALIAS (libat_or_fetch_16, LSE128, LSE2)
ALIAS (libat_and_fetch_16, LSE128, LSE2)
#endif
ALIAS (libat_load_16, LSE128, LSE2)
ALIAS (libat_store_16, LSE128, LSE2)
ALIAS (libat_compare_exchange_16, LSE128, LSE2)
ALIAS (libat_fetch_add_16, LSE128, LSE2)
ALIAS (libat_add_fetch_16, LSE128, LSE2)
ALIAS (libat_fetch_sub_16, LSE128, LSE2)
ALIAS (libat_sub_fetch_16, LSE128, LSE2)
ALIAS (libat_fetch_xor_16, LSE128, LSE2)
ALIAS (libat_xor_fetch_16, LSE128, LSE2)
ALIAS (libat_fetch_nand_16, LSE128, LSE2)
ALIAS (libat_nand_fetch_16, LSE128, LSE2)
ALIAS (libat_test_and_set_16, LSE128, LSE2)
/* Alias entry points which are the same in baseline and LSE2. */
ALIAS (libat_exchange_16, LSE2, CORE)
ALIAS (libat_fetch_add_16, LSE2, CORE)
ALIAS (libat_add_fetch_16, LSE2, CORE)
ALIAS (libat_fetch_sub_16, LSE2, CORE)
ALIAS (libat_sub_fetch_16, LSE2, CORE)
ALIAS (libat_fetch_or_16, LSE2, CORE)
ALIAS (libat_or_fetch_16, LSE2, CORE)
ALIAS (libat_fetch_and_16, LSE2, CORE)
ALIAS (libat_and_fetch_16, LSE2, CORE)
ALIAS (libat_fetch_xor_16, LSE2, CORE)
ALIAS (libat_xor_fetch_16, LSE2, CORE)
ALIAS (libat_fetch_nand_16, LSE2, CORE)
ALIAS (libat_nand_fetch_16, LSE2, CORE)
ALIAS (libat_test_and_set_16, LSE2, CORE)
/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */
#define FEATURE_1_AND 0xc0000000
#define FEATURE_1_BTI 1
#define FEATURE_1_PAC 2
/* Supported features based on the code generation options. */
#if defined(__ARM_FEATURE_BTI_DEFAULT)
# define BTI_FLAG FEATURE_1_BTI
#else
# define BTI_FLAG 0
#endif
#if __ARM_FEATURE_PAC_DEFAULT & 3
# define PAC_FLAG FEATURE_1_PAC
#else
# define PAC_FLAG 0
#endif
/* Add a NT_GNU_PROPERTY_TYPE_0 note. */
#define GNU_PROPERTY(type, value) \
.section .note.gnu.property, "a"; \
.p2align 3; \
.word 4; \
.word 16; \
.word 5; \
.asciz "GNU"; \
.word type; \
.word 4; \
.word value; \
.word 0;
#if defined(__linux__) || defined(__FreeBSD__)
.section .note.GNU-stack, "", %progbits
/* Add GNU property note if built with branch protection. */
# if (BTI_FLAG|PAC_FLAG) != 0
GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
# endif
#endif