aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libatomic/Makefile.am5
-rw-r--r--libatomic/Makefile.in6
-rw-r--r--libatomic/config/x86/host-config.h43
-rw-r--r--libatomic/config/x86/init.c12
4 files changed, 55 insertions, 11 deletions
diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am
index 389f3dd..d88515e 100644
--- a/libatomic/Makefile.am
+++ b/libatomic/Makefile.am
@@ -138,8 +138,9 @@ IFUNC_OPTIONS = -march=i586
libatomic_la_LIBADD += $(addsuffix _8_1_.lo,$(SIZEOBJS))
endif
if ARCH_X86_64
-IFUNC_OPTIONS = -mcx16
-libatomic_la_LIBADD += $(addsuffix _16_1_.lo,$(SIZEOBJS))
+IFUNC_OPTIONS = -mcx16 -mcx16
+libatomic_la_LIBADD += $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
+ $(addsuffix _16_2_.lo,$(SIZEOBJS))
endif
endif
diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in
index 0a51bd5..80d2565 100644
--- a/libatomic/Makefile.in
+++ b/libatomic/Makefile.in
@@ -96,7 +96,9 @@ target_triplet = @target@
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix \
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _8_2_.lo,$(SIZEOBJS))
@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(addsuffix _8_1_.lo,$(SIZEOBJS))
-@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS))
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix _16_2_.lo,$(SIZEOBJS))
+
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
@@ -435,7 +437,7 @@ libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \
@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse
@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64
@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586
-@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -mcx16
+@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -mcx16 -mcx16
libatomic_convenience_la_SOURCES = $(libatomic_la_SOURCES)
libatomic_convenience_la_LIBADD = $(libatomic_la_LIBADD)
MULTISRCTOP =
diff --git a/libatomic/config/x86/host-config.h b/libatomic/config/x86/host-config.h
index f20ce09..007b7e1 100644
--- a/libatomic/config/x86/host-config.h
+++ b/libatomic/config/x86/host-config.h
@@ -55,31 +55,37 @@ load_feat1 (void)
}
#ifdef __x86_64__
-# define IFUNC_COND_1 (load_feat1 () & bit_CMPXCHG16B)
+# define IFUNC_COND_1 ((load_feat1 () & (bit_AVX | bit_CMPXCHG16B)) \
+ == (bit_AVX | bit_CMPXCHG16B))
+# define IFUNC_COND_2 (load_feat1 () & bit_CMPXCHG16B)
#else
# define IFUNC_COND_1 (load_feat1 () & bit_CMPXCHG8B)
#endif
#ifdef __x86_64__
-# define IFUNC_NCOND(N) (N == 16)
+# define IFUNC_NCOND(N) (2 * (N == 16))
#else
# define IFUNC_NCOND(N) (N == 8)
#endif
#ifdef __x86_64__
# undef MAYBE_HAVE_ATOMIC_CAS_16
-# define MAYBE_HAVE_ATOMIC_CAS_16 IFUNC_COND_1
+# define MAYBE_HAVE_ATOMIC_CAS_16 IFUNC_COND_2
# undef MAYBE_HAVE_ATOMIC_EXCHANGE_16
-# define MAYBE_HAVE_ATOMIC_EXCHANGE_16 IFUNC_COND_1
+# define MAYBE_HAVE_ATOMIC_EXCHANGE_16 IFUNC_COND_2
# undef MAYBE_HAVE_ATOMIC_LDST_16
-# define MAYBE_HAVE_ATOMIC_LDST_16 IFUNC_COND_1
+# define MAYBE_HAVE_ATOMIC_LDST_16 IFUNC_COND_2
/* Since load and store are implemented with CAS, they are not fast. */
# undef FAST_ATOMIC_LDST_16
# define FAST_ATOMIC_LDST_16 0
-# if IFUNC_ALT == 1
+# if IFUNC_ALT != 0
# undef HAVE_ATOMIC_CAS_16
# define HAVE_ATOMIC_CAS_16 1
# endif
+# if IFUNC_ALT == 1
+# undef HAVE_ATOMIC_LDST_16
+# define HAVE_ATOMIC_LDST_16 1
+# endif
#else
# undef MAYBE_HAVE_ATOMIC_CAS_8
# define MAYBE_HAVE_ATOMIC_CAS_8 IFUNC_COND_1
@@ -93,7 +99,7 @@ load_feat1 (void)
# endif
#endif
-#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1
+#if defined(__x86_64__) && N == 16 && IFUNC_ALT != 0
static inline bool
atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval,
bool weak_p UNUSED, int sm UNUSED, int fm UNUSED)
@@ -108,6 +114,29 @@ atomic_compare_exchange_n (UTYPE *mptr, UTYPE *eptr, UTYPE newval,
# define atomic_compare_exchange_n atomic_compare_exchange_n
#endif /* Have CAS 16 */
+#if defined(__x86_64__) && N == 16 && IFUNC_ALT == 1
+#define __atomic_load_n(ptr, model) \
+ (sizeof (*ptr) == 16 ? atomic_load_n (ptr, model) \
+ : (__atomic_load_n) (ptr, model))
+#define __atomic_store_n(ptr, val, model) \
+ (sizeof (*ptr) == 16 ? atomic_store_n (ptr, val, model) \
+ : (__atomic_store_n) (ptr, val, model))
+
+static inline UTYPE
+atomic_load_n (UTYPE *ptr, int model UNUSED)
+{
+ UTYPE ret;
+ __asm__ ("vmovdqa\t{%1, %0|%0, %1}" : "=x" (ret) : "m" (*ptr));
+ return ret;
+}
+
+static inline void
+atomic_store_n (UTYPE *ptr, UTYPE val, int model UNUSED)
+{
+ __asm__ ("vmovdqa\t{%1, %0|%0, %1}\n\tmfence" : "=m" (*ptr) : "x" (val));
+}
+#endif
+
#endif /* HAVE_IFUNC */
#include_next <host-config.h>
diff --git a/libatomic/config/x86/init.c b/libatomic/config/x86/init.c
index 7bdec72..6f6499c 100644
--- a/libatomic/config/x86/init.c
+++ b/libatomic/config/x86/init.c
@@ -34,6 +34,18 @@ __libat_feat1_init (void)
unsigned int eax, ebx, ecx, edx;
FEAT1_REGISTER = 0;
__get_cpuid (1, &eax, &ebx, &ecx, &edx);
+#ifdef __x86_64__
+ if ((FEAT1_REGISTER & (bit_AVX | bit_CMPXCHG16B))
+ == (bit_AVX | bit_CMPXCHG16B))
+ {
+ /* Intel SDM guarantees that 16-byte VMOVDQA on 16-byte aligned address
+ is atomic, but so far we don't have this guarantee from AMD. */
+ unsigned int ecx2 = 0;
+ __get_cpuid (0, &eax, &ebx, &ecx2, &edx);
+ if (ecx2 != signature_INTEL_ecx)
+ FEAT1_REGISTER &= ~bit_AVX;
+ }
+#endif
/* See the load in load_feat1. */
__atomic_store_n (&__libat_feat1, FEAT1_REGISTER, __ATOMIC_RELAXED);
return FEAT1_REGISTER;