/* Out-of-line LSE atomics for AArch64 architecture. Copyright (C) 2019-2020 Free Software Foundation, Inc. Contributed by Linaro Ltd. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ /* * The problem that we are trying to solve is operating system deployment * of ARMv8.1-Atomics, also known as Large System Exensions (LSE). * * There are a number of potential solutions for this problem which have * been proposed and rejected for various reasons. To recap: * * (1) Multiple builds. The dynamic linker will examine /lib64/atomics/ * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten. * However, not all Linux distributions are happy with multiple builds, * and anyway it has no effect on main applications. * * (2) IFUNC. We could put these functions into libgcc_s.so, and have * a single copy of each function for all DSOs. However, ARM is concerned * that the branch-to-indirect-branch that is implied by using a PLT, * as required by IFUNC, is too much overhead for smaller cpus. * * (3) Statically predicted direct branches. This is the approach that * is taken here. These functions are linked into every DSO that uses them. * All of the symbols are hidden, so that the functions are called via a * direct branch. The choice of LSE vs non-LSE is done via one byte load * followed by a well-predicted direct branch. The functions are compiled * separately to minimize code size. */ /* Tell the assembler to accept LSE instructions. */ .arch armv8-a+lse /* Declare the symbol gating the LSE implementations. */ .hidden __aarch64_have_lse_atomics /* Turn size and memory model defines into mnemonic fragments. */ #if SIZE == 1 # define S b # define UXT uxtb #elif SIZE == 2 # define S h # define UXT uxth #elif SIZE == 4 || SIZE == 8 || SIZE == 16 # define S # define UXT mov #else # error #endif #if MODEL == 1 # define SUFF _relax # define A # define L #elif MODEL == 2 # define SUFF _acq # define A a # define L #elif MODEL == 3 # define SUFF _rel # define A # define L l #elif MODEL == 4 # define SUFF _acq_rel # define A a # define L l #else # error #endif /* Concatenate symbols. */ #define glue2_(A, B) A ## B #define glue2(A, B) glue2_(A, B) #define glue3_(A, B, C) A ## B ## C #define glue3(A, B, C) glue3_(A, B, C) #define glue4_(A, B, C, D) A ## B ## C ## D #define glue4(A, B, C, D) glue4_(A, B, C, D) /* Select the size of a register, given a regno. */ #define x(N) glue2(x, N) #define w(N) glue2(w, N) #if SIZE < 8 # define s(N) w(N) #else # define s(N) x(N) #endif #define NAME(BASE) glue4(__aarch64_, BASE, SIZE, SUFF) #define LDXR glue4(ld, A, xr, S) #define STXR glue4(st, L, xr, S) /* Temporary registers used. Other than these, only the return value register (x0) and the flags are modified. */ #define tmp0 16 #define tmp1 17 #define tmp2 15 /* Start and end a function. */ .macro STARTFN name .text .balign 16 .globl \name .hidden \name .type \name, %function .cfi_startproc \name: .endm .macro ENDFN name .cfi_endproc .size \name, . - \name .endm /* Branch to LABEL if LSE is disabled. */ .macro JUMP_IF_NOT_LSE label adrp x(tmp0), __aarch64_have_lse_atomics ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics] cbz w(tmp0), \label .endm #ifdef L_cas STARTFN NAME(cas) JUMP_IF_NOT_LSE 8f #if SIZE < 16 #define CAS glue4(cas, A, L, S) CAS s(0), s(1), [x2] ret 8: UXT s(tmp0), s(0) 0: LDXR s(0), [x2] cmp s(0), s(tmp0) bne 1f STXR w(tmp1), s(1), [x2] cbnz w(tmp1), 0b 1: ret #else #define LDXP glue3(ld, A, xp) #define STXP glue3(st, L, xp) #define CASP glue3(casp, A, L) CASP x0, x1, x2, x3, [x4] ret 8: mov x(tmp0), x0 mov x(tmp1), x1 0: LDXP x0, x1, [x4] cmp x0, x(tmp0) ccmp x1, x(tmp1), #0, eq bne 1f STXP w(tmp2), x(tmp0), x(tmp1), [x4] cbnz w(tmp2), 0b 1: ret #endif ENDFN NAME(cas) #endif #ifdef L_swp #define SWP glue4(swp, A, L, S) STARTFN NAME(swp) JUMP_IF_NOT_LSE 8f SWP s(0), s(0), [x1] ret 8: mov s(tmp0), s(0) 0: LDXR s(0), [x1] STXR w(tmp1), s(tmp0), [x1] cbnz w(tmp1), 0b ret ENDFN NAME(swp) #endif #if defined(L_ldadd) || defined(L_ldclr) \ || defined(L_ldeor) || defined(L_ldset) #ifdef L_ldadd #define LDNM ldadd #define OP add #elif defined(L_ldclr) #define LDNM ldclr #define OP bic #elif defined(L_ldeor) #define LDNM ldeor #define OP eor #elif defined(L_ldset) #define LDNM ldset #define OP orr #else #error #endif #define LDOP glue4(LDNM, A, L, S) STARTFN NAME(LDNM) JUMP_IF_NOT_LSE 8f LDOP s(0), s(0), [x1] ret 8: mov s(tmp0), s(0) 0: LDXR s(0), [x1] OP s(tmp1), s(0), s(tmp0) STXR w(tmp2), s(tmp1), [x1] cbnz w(tmp2), 0b ret ENDFN NAME(LDNM) #endif