aboutsummaryrefslogtreecommitdiff
path: root/libatomic
diff options
context:
space:
mode:
authorVictor Do Nascimento <victor.donascimento@arm.com>2024-06-10 11:10:36 +0100
committerVictor Do Nascimento <victor.donascimento@arm.com>2024-06-25 11:48:38 +0100
commit7107574958e2bed11d916a1480ef1319f15e5ffe (patch)
treef8be67d195c33b03b84ceb7616d1714de2388649 /libatomic
parentd4db77ce37a65207baea88859fd9c191469187f8 (diff)
downloadgcc-7107574958e2bed11d916a1480ef1319f15e5ffe.zip
gcc-7107574958e2bed11d916a1480ef1319f15e5ffe.tar.gz
gcc-7107574958e2bed11d916a1480ef1319f15e5ffe.tar.bz2
libatomic: Add rcpc3 128-bit atomic operations for AArch64
The introduction of the optional RCPC3 architectural extension for Armv8.2-A upwards provides additional support for the release consistency model, introducing the Load-Acquire RCpc Pair Ordered, and Store-Release Pair Ordered operations in the form of LDIAPP and STILP. These operations are single-copy atomic on cores which also implement LSE2 and, as such, support for these operations is added to Libatomic and employed accordingly when the LSE2 and RCPC3 features are detected in a given core at runtime. libatomic/ChangeLog: * config/linux/aarch64/atomic_16.S (libat_load_16): Add LRCPC3 variant. (libat_store_16): Likewise. * config/linux/aarch64/host-config.h (HWCAP2_LRCPC3): New. (LSE2_LRCPC3_ATOP): Previously LSE2_ATOP. New ifuncs guarded under it. (has_rcpc3): New.
Diffstat (limited to 'libatomic')
-rw-r--r--libatomic/config/linux/aarch64/atomic_16.S46
-rw-r--r--libatomic/config/linux/aarch64/host-config.h34
2 files changed, 74 insertions, 6 deletions
diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S
index c44c31c..5767fba 100644
--- a/libatomic/config/linux/aarch64/atomic_16.S
+++ b/libatomic/config/linux/aarch64/atomic_16.S
@@ -35,16 +35,21 @@
writes, this will be true when using atomics in actual code.
The libat_<op>_16 entry points are ARMv8.0.
- The libat_<op>_16_i1 entry points are used when LSE128 is available.
+ The libat_<op>_16_i1 entry points are used when LSE128 or LRCPC3 is available.
The libat_<op>_16_i2 entry points are used when LSE2 is available. */
#include "auto-config.h"
.arch armv8-a+lse
+/* There is overlap in atomic instructions implemented in RCPC3 and LSE2.
+ Consequently, both _i1 and _i2 suffixes are needed for functions using these.
+ Elsewhere, all extension-specific implementations are mapped to _i1. */
+
+#define LRCPC3(NAME) libat_##NAME##_i1
#define LSE128(NAME) libat_##NAME##_i1
#define LSE(NAME) libat_##NAME##_i1
-#define LSE2(NAME) libat_##NAME##_i1
+#define LSE2(NAME) libat_##NAME##_i2
#define CORE(NAME) libat_##NAME
#define ATOMIC(NAME) __atomic_##NAME
@@ -513,6 +518,43 @@ END (test_and_set_16)
/* ifunc implementations: Carries run-time dependence on the presence of further
architectural extensions. */
+ENTRY_FEAT (load_16, LRCPC3)
+ cbnz w1, 1f
+
+ /* RELAXED. */
+ ldp res0, res1, [x0]
+ ret
+1:
+ cmp w1, SEQ_CST
+ b.eq 2f
+
+ /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */
+ /* ldiapp res0, res1, [x0] */
+ .inst 0xd9411800
+ ret
+
+ /* SEQ_CST. */
+2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */
+ /* ldiapp res0, res1, [x0] */
+ .inst 0xd9411800
+ ret
+END_FEAT (load_16, LRCPC3)
+
+
+ENTRY_FEAT (store_16, LRCPC3)
+ cbnz w4, 1f
+
+ /* RELAXED. */
+ stp in0, in1, [x0]
+ ret
+
+ /* RELEASE/SEQ_CST. */
+1: /* stilp in0, in1, [x0] */
+ .inst 0xd9031802
+ ret
+END_FEAT (store_16, LRCPC3)
+
+
ENTRY_FEAT (exchange_16, LSE128)
mov tmp0, x0
mov res0, in0
diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h
index d05e9eb..93f367d 100644
--- a/libatomic/config/linux/aarch64/host-config.h
+++ b/libatomic/config/linux/aarch64/host-config.h
@@ -33,6 +33,9 @@
#ifndef HWCAP_USCAT
# define HWCAP_USCAT (1 << 25)
#endif
+#ifndef HWCAP2_LRCPC3
+# define HWCAP2_LRCPC3 (1UL << 46)
+#endif
#ifndef HWCAP2_LSE128
# define HWCAP2_LSE128 (1UL << 47)
#endif
@@ -54,7 +57,7 @@ typedef struct __ifunc_arg_t {
#if defined (LAT_CAS_N)
# define LSE_ATOP
#elif defined (LAT_LOAD_N) || defined (LAT_STORE_N)
-# define LSE2_ATOP
+# define LSE2_LRCPC3_ATOP
#elif defined (LAT_EXCH_N) || defined (LAT_FIOR_N) || defined (LAT_FAND_N)
# define LSE128_ATOP
#endif
@@ -63,9 +66,10 @@ typedef struct __ifunc_arg_t {
# if defined (LSE_ATOP)
# define IFUNC_NCOND(N) 1
# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS)
-# elif defined (LSE2_ATOP)
-# define IFUNC_NCOND(N) 1
-# define IFUNC_COND_1 (has_lse2 (hwcap, features))
+# elif defined (LSE2_LRCPC3_ATOP)
+# define IFUNC_NCOND(N) 2
+# define IFUNC_COND_1 (has_rcpc3 (hwcap, features))
+# define IFUNC_COND_2 (has_lse2 (hwcap, features))
# elif defined (LSE128_ATOP)
# define IFUNC_NCOND(N) 1
# define IFUNC_COND_1 (has_lse128 (hwcap, features))
@@ -131,6 +135,28 @@ has_lse128 (unsigned long hwcap, const __ifunc_arg_t *features)
return false;
}
+/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic, bits[23:20]. The
+ expected value is 0b0011. Check that. */
+
+static inline bool
+has_rcpc3 (unsigned long hwcap, const __ifunc_arg_t *features)
+{
+ if (hwcap & _IFUNC_ARG_HWCAP
+ && features->_hwcap2 & HWCAP2_LRCPC3)
+ return true;
+ /* Try fallback feature check method to guarantee LRCPC3 is not implemented.
+
+ In the absence of HWCAP_CPUID, we are unable to check for RCPC3, return.
+ If feature check available, check LSE2 prerequisite before proceeding. */
+ if (!(hwcap & HWCAP_CPUID) || !(hwcap & HWCAP_USCAT))
+ return false;
+ unsigned long isar1;
+ asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (isar1));
+ if (AT_FEAT_FIELD (isar1) >= 3)
+ return true;
+ return false;
+}
+
#endif /* HAVE_IFUNC */
/* All 128-bit atomic functions are defined in aarch64/atomic_16.S. */