diff options
author | Simon Tatham <simon.tatham@arm.com> | 2025-08-05 08:45:54 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-08-05 08:45:54 +0100 |
commit | 87283db54852a899a52e4e01aa01b471ffa34a1d (patch) | |
tree | c29113576bc90ba89cb95eba3bcea8ca8810117a | |
parent | c5e6938c6710a1c34a32830c743f19a29ffef6e5 (diff) | |
download | llvm-87283db54852a899a52e4e01aa01b471ffa34a1d.zip llvm-87283db54852a899a52e4e01aa01b471ffa34a1d.tar.gz llvm-87283db54852a899a52e4e01aa01b471ffa34a1d.tar.bz2 |
[clang][ARM] Fix build failure in <arm_acle.h> for __swp (#151354)
In commit d5985905ae8e5b2 I introduced a Sema check that prohibits
`__builtin_arm_ldrex` and `__builtin_arm_strex` for data sizes not
supported by the target architecture version. However, `arm_acle.h`
unconditionally uses those builtins with a 32-bit data size. So now
including that header will cause a build failure on Armv6-M, or historic
architectures like Armv5.
To fix it, `arm_acle.h` now queries the compiler-defined
`__ARM_FEATURE_LDREX` macro (also fixed recently in commit
34f59d79209268e so that it matches the target architecture). If 32-bit
LDREX isn't available it will fall back to the older SWP instruction, or
failing that (on Armv6-M), a libcall.
While I was modifying the header anyway, I also renamed the local
variable `v` inside `__swp` so that it starts with `__`, avoiding any
risk of user code having #defined `v`.
-rw-r--r-- | clang/lib/Headers/arm_acle.h | 36 | ||||
-rw-r--r-- | clang/test/CodeGen/arm_acle.c | 27 | ||||
-rw-r--r-- | clang/test/CodeGen/arm_acle_swp.c | 17 |
3 files changed, 52 insertions, 28 deletions
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index 5cfa3d0..fcc2075 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -55,11 +55,37 @@ __chkfeat(uint64_t __features) { /* 7.5 Swap */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __swp(uint32_t __x, volatile uint32_t *__p) { - uint32_t v; - do - v = __builtin_arm_ldrex(__p); - while (__builtin_arm_strex(__x, __p)); - return v; + uint32_t __v; +#if (__ARM_FEATURE_LDREX & 4) || __ARM_ARCH_6M__ || __linux__ + /* + * Using this clang builtin is sensible in most situations. Where + * LDREX and STREX are available, it will compile to a loop using + * them. Otherwise it will compile to a libcall, requiring the + * runtime to provide that library function. + * + * That's unavoidable on Armv6-M, which has no atomic instructions + * at all (not even SWP), so in that situation the user will just + * have to provide an implementation of __atomic_exchange_4 (perhaps + * it would temporarily disable interrupts, and then do a separate + * load and store). + * + * We also use the libcall strategy on pre-Armv7 Linux targets, on + * the theory that Linux's runtime support library _will_ provide a + * suitable libcall, and it's better to use that than the SWP + * instruction because then when the same binary is run on a later + * Linux system the libcall implementation will use LDREX instead. + */ + __v = __atomic_exchange_n(__p, __x, __ATOMIC_RELAXED); +#else + /* + * But for older Arm architectures when the target is not Linux, we + * fall back to using the SWP instruction via inline assembler. ACLE + * is clear that we're allowed to do this, but shouldn't do it if we + * have a better alternative. + */ + __asm__("swp %0, %1, [%2]" : "=r"(__v) : "r"(__x), "r"(__p) : "memory"); +#endif + return __v; } /* 7.6 Memory prefetch intrinsics */ diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c index 74de824..0f539cb 100644 --- a/clang/test/CodeGen/arm_acle.c +++ b/clang/test/CodeGen/arm_acle.c @@ -139,29 +139,10 @@ void test_dbg(void) { #endif /* 8.5 Swap */ -// AArch32-LABEL: @test_swp( -// AArch32-NEXT: entry: -// AArch32-NEXT: br label [[DO_BODY_I:%.*]] -// AArch32: do.body.i: -// AArch32-NEXT: [[LDREX_I:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) [[P:%.*]]) -// AArch32-NEXT: [[STREX_I:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[X:%.*]], ptr elementtype(i32) [[P]]) -// AArch32-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[STREX_I]], 0 -// AArch32-NEXT: br i1 [[TOBOOL_I]], label [[DO_BODY_I]], label [[__SWP_EXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]] -// AArch32: __swp.exit: -// AArch32-NEXT: ret void -// -// AArch64-LABEL: @test_swp( -// AArch64-NEXT: entry: -// AArch64-NEXT: br label [[DO_BODY_I:%.*]] -// AArch64: do.body.i: -// AArch64-NEXT: [[LDXR_I:%.*]] = call i64 @llvm.aarch64.ldxr.p0(ptr elementtype(i32) [[P:%.*]]) -// AArch64-NEXT: [[TMP0:%.*]] = trunc i64 [[LDXR_I]] to i32 -// AArch64-NEXT: [[TMP1:%.*]] = zext i32 [[X:%.*]] to i64 -// AArch64-NEXT: [[STXR_I:%.*]] = call i32 @llvm.aarch64.stxr.p0(i64 [[TMP1]], ptr elementtype(i32) [[P]]) -// AArch64-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[STXR_I]], 0 -// AArch64-NEXT: br i1 [[TOBOOL_I]], label [[DO_BODY_I]], label [[__SWP_EXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]] -// AArch64: __swp.exit: -// AArch64-NEXT: ret void +// ARM-LABEL: @test_swp( +// ARM-NEXT: entry: +// ARM-NEXT: [[TMP0:%.*]] = atomicrmw volatile xchg ptr [[P:%.*]], i32 [[X:%.*]] monotonic, align 4 +// ARM-NEXT: ret void // void test_swp(uint32_t x, volatile void *p) { __swp(x, p); diff --git a/clang/test/CodeGen/arm_acle_swp.c b/clang/test/CodeGen/arm_acle_swp.c new file mode 100644 index 0000000..15fb49d --- /dev/null +++ b/clang/test/CodeGen/arm_acle_swp.c @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -ffreestanding -triple thumbv7m-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=ATOMIC +// RUN: %clang_cc1 -ffreestanding -triple armv7a-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=ATOMIC +// RUN: %clang_cc1 -ffreestanding -triple armv6-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=ATOMIC +// RUN: %clang_cc1 -ffreestanding -triple thumbv6m-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=ATOMIC +// RUN: %clang_cc1 -ffreestanding -triple armv5-unknown-linux-gnu -target-abi aapcs -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=ATOMIC +// RUN: %clang_cc1 -ffreestanding -triple armv5-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefix=SWP + +// REQUIRES: arm-registered-target + +#include <arm_acle.h> + +// SWP: call i32 asm "swp $0, $1, [$2]", "=r,r,r,~{memory}"(i32 {{.*}}, ptr {{.*}}) + +// ATOMIC: atomicrmw volatile xchg ptr {{.*}}, i32 {{.*}} monotonic, align 4 +uint32_t test_swp(uint32_t x, volatile void *p) { + return __swp(x, p); +} |