aboutsummaryrefslogtreecommitdiff
path: root/libatomic
diff options
context:
space:
mode:
authorWilco Dijkstra <wilco.dijkstra@arm.com>2022-11-15 14:38:55 +0000
committerWilco Dijkstra <wilco.dijkstra@arm.com>2022-11-15 15:08:33 +0000
commitd1288d850944f69a795e4ff444a427eba3fec11b (patch)
tree0357dd0e95cb2e1825217396f7ff76fe6b1a377b /libatomic
parent5925f0ec54ab5ed773935eec09a602f58fa0ca2c (diff)
downloadgcc-d1288d850944f69a795e4ff444a427eba3fec11b.zip
gcc-d1288d850944f69a795e4ff444a427eba3fec11b.tar.gz
gcc-d1288d850944f69a795e4ff444a427eba3fec11b.tar.bz2
libatomic: Add support for LSE and LSE2
Add support for AArch64 LSE and LSE2 to libatomic. Disable outline atomics, and use LSE ifuncs for 1-8 byte atomics and LSE2 ifuncs for 16-byte atomics. On Neoverse V1, 16-byte atomics are ~4x faster due to avoiding locks. Note this is safe since we swap all 16-byte atomics using the same ifunc, so they either use locks or LSE2 atomics, but never a mix. This also improves ABI compatibility with LLVM: its inlined 16-byte atomics are compatible with the new libatomic if LSE2 is supported. libatomic/ * Makefile.in: Regenerated with automake 1.15.1. * Makefile.am: Add atomic_16.S for AArch64. * configure.tgt: Disable outline atomics in AArch64 build. * config/linux/aarch64/atomic_16.S: New file - implementation of ifuncs for 16-byte atomics. * config/linux/aarch64/host-config.h: Enable ifuncs, use LSE (HWCAP_ATOMICS) for 1-8-byte atomics and LSE2 (HWCAP_USCAT) for 16-byte atomics.
Diffstat (limited to 'libatomic')
-rw-r--r--libatomic/Makefile.am2
-rw-r--r--libatomic/Makefile.in62
-rw-r--r--libatomic/config/linux/aarch64/atomic_16.S462
-rw-r--r--libatomic/config/linux/aarch64/host-config.h18
-rw-r--r--libatomic/configure.tgt1
5 files changed, 527 insertions, 18 deletions
diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am
index d88515e..41e5da2 100644
--- a/libatomic/Makefile.am
+++ b/libatomic/Makefile.am
@@ -127,6 +127,8 @@ if HAVE_IFUNC
if ARCH_AARCH64_LINUX
IFUNC_OPTIONS = -march=armv8-a+lse
libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
+libatomic_la_SOURCES += atomic_16.S
+
endif
if ARCH_ARM_LINUX
IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64
diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in
index 80d2565..89e29fc 100644
--- a/libatomic/Makefile.in
+++ b/libatomic/Makefile.in
@@ -90,13 +90,14 @@ build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_1 = $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS)))
-@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = $(foreach \
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = atomic_16.S
+@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(foreach \
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ s,$(SIZES),$(addsuffix \
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _$(s)_1_.lo,$(SIZEOBJS))) \
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix \
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _8_2_.lo,$(SIZEOBJS))
-@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
-@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
+@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_5 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix _16_2_.lo,$(SIZEOBJS))
subdir = .
@@ -154,8 +155,11 @@ am__uninstall_files_from_dir = { \
}
am__installdirs = "$(DESTDIR)$(toolexeclibdir)"
LTLIBRARIES = $(noinst_LTLIBRARIES) $(toolexeclib_LTLIBRARIES)
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__objects_1 = \
+@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@ atomic_16.lo
am_libatomic_la_OBJECTS = gload.lo gstore.lo gcas.lo gexch.lo \
- glfree.lo lock.lo init.lo fenv.lo fence.lo flag.lo
+ glfree.lo lock.lo init.lo fenv.lo fence.lo flag.lo \
+ $(am__objects_1)
libatomic_la_OBJECTS = $(am_libatomic_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
@@ -165,9 +169,9 @@ libatomic_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(libatomic_la_LDFLAGS) $(LDFLAGS) -o $@
libatomic_convenience_la_DEPENDENCIES = $(libatomic_la_LIBADD)
-am__objects_1 = gload.lo gstore.lo gcas.lo gexch.lo glfree.lo lock.lo \
- init.lo fenv.lo fence.lo flag.lo
-am_libatomic_convenience_la_OBJECTS = $(am__objects_1)
+am__objects_2 = gload.lo gstore.lo gcas.lo gexch.lo glfree.lo lock.lo \
+ init.lo fenv.lo fence.lo flag.lo $(am__objects_1)
+am_libatomic_convenience_la_OBJECTS = $(am__objects_2)
libatomic_convenience_la_OBJECTS = \
$(am_libatomic_convenience_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
@@ -185,6 +189,16 @@ am__v_at_1 =
depcomp = $(SHELL) $(top_srcdir)/../depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
+CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS)
+LTCPPASCOMPILE = $(LIBTOOL) $(AM_V_lt) $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CCASFLAGS) $(CCASFLAGS)
+AM_V_CPPAS = $(am__v_CPPAS_@AM_V@)
+am__v_CPPAS_ = $(am__v_CPPAS_@AM_DEFAULT_V@)
+am__v_CPPAS_0 = @echo " CPPAS " $@;
+am__v_CPPAS_1 =
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
@@ -369,6 +383,7 @@ pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
+runstatedir = @runstatedir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
@@ -404,9 +419,8 @@ noinst_LTLIBRARIES = libatomic_convenience.la
@LIBAT_BUILD_VERSIONED_SHLIB_SUN_TRUE@@LIBAT_BUILD_VERSIONED_SHLIB_TRUE@libatomic_version_dep = libatomic.map-sun
libatomic_version_info = -version-info $(libtool_VERSION)
libatomic_la_LDFLAGS = $(libatomic_version_info) $(libatomic_version_script) $(lt_host_flags)
-libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c init.c \
- fenv.c fence.c flag.c
-
+libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c \
+ init.c fenv.c fence.c flag.c $(am__append_2)
SIZEOBJS = load store cas exch fadd fsub fand fior fxor fnand tas
EXTRA_libatomic_la_SOURCES = $(addsuffix _n.c,$(SIZEOBJS))
libatomic_la_DEPENDENCIES = $(libatomic_la_LIBADD) $(libatomic_version_dep)
@@ -432,8 +446,8 @@ all_c_files := $(foreach dir,$(search_path),$(wildcard $(dir)/*.c))
# Then sort through them to find the one we want, and select the first.
M_SRC = $(firstword $(filter %/$(M_FILE), $(all_c_files)))
libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \
- _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_2) \
- $(am__append_3) $(am__append_4)
+ _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_3) \
+ $(am__append_4) $(am__append_5)
@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64
@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586
@@ -450,7 +464,7 @@ all: auto-config.h
$(MAKE) $(AM_MAKEFLAGS) all-recursive
.SUFFIXES:
-.SUFFIXES: .c .lo .o .obj
+.SUFFIXES: .S .c .lo .o .obj
am--refresh: Makefile
@:
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/../multilib.am $(am__configure_deps)
@@ -559,6 +573,7 @@ mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic_16.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fence.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fenv.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/flag.Plo@am__quote@
@@ -570,6 +585,27 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/init.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@
+.S.o:
+@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ $<
+
+.S.obj:
+@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.S.lo:
+@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(LTCPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(LTCPPASCOMPILE) -c -o $@ $<
+
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S
new file mode 100644
index 0000000..bced729
--- /dev/null
+++ b/libatomic/config/linux/aarch64/atomic_16.S
@@ -0,0 +1,462 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+
+ This file is part of the GNU Atomic Library (libatomic).
+
+ Libatomic is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+ .arch armv8-a+lse
+
+#define ENTRY(name) \
+ .global name; \
+ .hidden name; \
+ .type name,%function; \
+ .p2align 4; \
+name: \
+ .cfi_startproc; \
+ hint 34 // bti c
+
+#define END(name) \
+ .cfi_endproc; \
+ .size name, .-name;
+
+#define res0 x0
+#define res1 x1
+#define in0 x2
+#define in1 x3
+#define tmp0 x6
+#define tmp1 x7
+#define exp0 x8
+#define exp1 x9
+
+#ifdef __AARCH64EB__
+# define reslo x1
+# define reshi x0
+# define inlo x3
+# define inhi x2
+# define tmplo x7
+# define tmphi x6
+#else
+# define reslo x0
+# define reshi x1
+# define inlo x2
+# define inhi x3
+# define tmplo x6
+# define tmphi x7
+#endif
+
+#define RELAXED 0
+#define CONSUME 1
+#define ACQUIRE 2
+#define RELEASE 3
+#define ACQ_REL 4
+#define SEQ_CST 5
+
+
+ENTRY (libat_load_16_i1)
+ cbnz w1, 1f
+ ldp res0, res1, [x0]
+ ret
+1:
+ cmp w1, ACQUIRE
+ b.hi 2f
+ ldp res0, res1, [x0]
+ dmb ishld
+ ret
+2:
+ ldp res0, res1, [x0]
+ dmb ish
+ ret
+END (libat_load_16_i1)
+
+
+ENTRY (libat_store_16_i1)
+ cbnz w4, 1f
+ stp in0, in1, [x0]
+ ret
+1:
+ dmb ish
+ stp in0, in1, [x0]
+ cmp w4, SEQ_CST
+ beq 2f
+ ret
+2:
+ dmb ish
+ ret
+END (libat_store_16_i1)
+
+
+ENTRY (libat_exchange_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ stxp w4, in0, in1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ cmp w4, ACQUIRE
+ b.hi 4f
+3:
+ ldaxp res0, res1, [x5]
+ stxp w4, in0, in1, [x5]
+ cbnz w4, 3b
+ ret
+4:
+ cmp w4, RELEASE
+ b.ne 6f
+5:
+ ldxp res0, res1, [x5]
+ stlxp w4, in0, in1, [x5]
+ cbnz w4, 5b
+ ret
+6:
+ ldaxp res0, res1, [x5]
+ stlxp w4, in0, in1, [x5]
+ cbnz w4, 6b
+ ret
+END (libat_exchange_16_i1)
+
+
+ENTRY (libat_compare_exchange_16_i1)
+ ldp exp0, exp1, [x1]
+ mov tmp0, exp0
+ mov tmp1, exp1
+ cbz w4, 2f
+ cmp w4, RELEASE
+ b.hs 3f
+ caspa exp0, exp1, in0, in1, [x0]
+0:
+ cmp exp0, tmp0
+ ccmp exp1, tmp1, 0, eq
+ bne 1f
+ mov x0, 1
+ ret
+1:
+ stp exp0, exp1, [x1]
+ mov x0, 0
+ ret
+2:
+ casp exp0, exp1, in0, in1, [x0]
+ b 0b
+3:
+ b.hi 4f
+ caspl exp0, exp1, in0, in1, [x0]
+ b 0b
+4:
+ caspal exp0, exp1, in0, in1, [x0]
+ b 0b
+END (libat_compare_exchange_16_i1)
+
+
+ENTRY (libat_fetch_add_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ adds tmplo, reslo, inlo
+ adc tmphi, reshi, inhi
+ stxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ adds tmplo, reslo, inlo
+ adc tmphi, reshi, inhi
+ stlxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_fetch_add_16_i1)
+
+
+ENTRY (libat_add_fetch_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ adds reslo, reslo, inlo
+ adc reshi, reshi, inhi
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ adds reslo, reslo, inlo
+ adc reshi, reshi, inhi
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_add_fetch_16_i1)
+
+
+ENTRY (libat_fetch_sub_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ subs tmplo, reslo, inlo
+ sbc tmphi, reshi, inhi
+ stxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ subs tmplo, reslo, inlo
+ sbc tmphi, reshi, inhi
+ stlxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_fetch_sub_16_i1)
+
+
+ENTRY (libat_sub_fetch_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ subs reslo, reslo, inlo
+ sbc reshi, reshi, inhi
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ subs reslo, reslo, inlo
+ sbc reshi, reshi, inhi
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_sub_fetch_16_i1)
+
+
+ENTRY (libat_fetch_or_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ orr tmp0, res0, in0
+ orr tmp1, res1, in1
+ stxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ orr tmp0, res0, in0
+ orr tmp1, res1, in1
+ stlxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_fetch_or_16_i1)
+
+
+ENTRY (libat_or_fetch_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ orr res0, res0, in0
+ orr res1, res1, in1
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ orr res0, res0, in0
+ orr res1, res1, in1
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_or_fetch_16_i1)
+
+
+ENTRY (libat_fetch_and_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ and tmp0, res0, in0
+ and tmp1, res1, in1
+ stxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ and tmp0, res0, in0
+ and tmp1, res1, in1
+ stlxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_fetch_and_16_i1)
+
+
+ENTRY (libat_and_fetch_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ and res0, res0, in0
+ and res1, res1, in1
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ and res0, res0, in0
+ and res1, res1, in1
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_and_fetch_16_i1)
+
+
+ENTRY (libat_fetch_xor_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ eor tmp0, res0, in0
+ eor tmp1, res1, in1
+ stxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ eor tmp0, res0, in0
+ eor tmp1, res1, in1
+ stlxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_fetch_xor_16_i1)
+
+
+ENTRY (libat_xor_fetch_16_i1)
+ mov x5, x0
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ eor res0, res0, in0
+ eor res1, res1, in1
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ eor res0, res0, in0
+ eor res1, res1, in1
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_xor_fetch_16_i1)
+
+
+ENTRY (libat_fetch_nand_16_i1)
+ mov x5, x0
+ mvn in0, in0
+ mvn in1, in1
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ orn tmp0, in0, res0
+ orn tmp1, in1, res1
+ stxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ orn tmp0, in0, res0
+ orn tmp1, in1, res1
+ stlxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_fetch_nand_16_i1)
+
+
+ENTRY (libat_nand_fetch_16_i1)
+ mov x5, x0
+ mvn in0, in0
+ mvn in1, in1
+ cbnz w4, 2f
+1:
+ ldxp res0, res1, [x5]
+ orn res0, in0, res0
+ orn res1, in1, res1
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
+2:
+ ldaxp res0, res1, [x5]
+ orn res0, in0, res0
+ orn res1, in1, res1
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (libat_nand_fetch_16_i1)
+
+
+ENTRY (libat_test_and_set_16_i1)
+ mov w2, 1
+ cbnz w1, 2f
+ swpb w0, w2, [x0]
+ ret
+
+2: swpalb w0, w2, [x0]
+ ret
+END (libat_test_and_set_16_i1)
+
+
+/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */
+#define FEATURE_1_AND 0xc0000000
+#define FEATURE_1_BTI 1
+#define FEATURE_1_PAC 2
+
+/* Supported features based on the code generation options. */
+#if defined(__ARM_FEATURE_BTI_DEFAULT)
+# define BTI_FLAG FEATURE_1_BTI
+#else
+# define BTI_FLAG 0
+#endif
+
+#if __ARM_FEATURE_PAC_DEFAULT & 3
+# define PAC_FLAG FEATURE_1_PAC
+#else
+# define PAC_FLAG 0
+#endif
+
+/* Add a NT_GNU_PROPERTY_TYPE_0 note. */
+#define GNU_PROPERTY(type, value) \
+ .section .note.gnu.property, "a"; \
+ .p2align 3; \
+ .word 4; \
+ .word 16; \
+ .word 5; \
+ .asciz "GNU"; \
+ .word type; \
+ .word 4; \
+ .word value; \
+ .word 0;
+
+#if defined(__linux__) || defined(__FreeBSD__)
+.section .note.GNU-stack, "", %progbits
+
+/* Add GNU property note if built with branch protection. */
+# if (BTI_FLAG|PAC_FLAG) != 0
+GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
+# endif
+#endif
diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h
index 769ba6e..d9b5ab3 100644
--- a/libatomic/config/linux/aarch64/host-config.h
+++ b/libatomic/config/linux/aarch64/host-config.h
@@ -22,14 +22,22 @@
<http://www.gnu.org/licenses/>. */
#if HAVE_IFUNC
-#include <stdlib.h>
+#include <sys/auxv.h>
-# ifdef HWCAP_ATOMICS
-# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS)
+#ifdef HWCAP_USCAT
+# if N == 16
+# define IFUNC_COND_1 (hwcap & HWCAP_USCAT)
# else
-# define IFUNC_COND_1 (false)
+# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS)
# endif
-# define IFUNC_NCOND(N) (1)
+#else
+# define IFUNC_COND_1 (false)
+#endif
+#define IFUNC_NCOND(N) (1)
+
+#if N == 16 && IFUNC_ALT != 0
+# define DONE 1
+#endif
#endif /* HAVE_IFUNC */
diff --git a/libatomic/configure.tgt b/libatomic/configure.tgt
index 86a5947..57f0936 100644
--- a/libatomic/configure.tgt
+++ b/libatomic/configure.tgt
@@ -49,6 +49,7 @@ case "${target_cpu}" in
fi
;;
esac
+ XCFLAGS="${XCFLAGS} -mno-outline-atomics"
;;
arm*)
ARCH=arm