From 33de7b37463fd3b846f76e86ed55aaa46870f92e Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Fri, 18 Nov 2022 09:06:38 +0000 Subject: aarch64: Fix up LDAPR codegen Upon some further inspection I realised I had misunderstood some intricacies of the extending loads of the RCPC feature. This patch fixes up the recent GCC support accordingly. In particular: * The sign-extending forms are a form of LDAPURS* and are actually part of FEAT_RCPC2 that is enabled with Armv8.4-a rather than the base Armv8.3-a FEAT_RCPC. The patch introduces a TARGET_RCPC2 macro and gates this combine pattern accordingly. * The assembly output for the zero-extending LDAPR instruction should always use %w formatting for its destination register. The testcase is split into zero-extending and sign-extending parts since they require different architecture pragmas. It's also straightforward to add the rest of the FEAT_RCPC2 codegen (with immediate offset addressing modes) but that can be done as a separate patch. Apologies for not catching this sooner, but it hasn't been in trunk long, so no harm done. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64.h (TARGET_RCPC2): Define. * config/aarch64/atomics.md (*aarch64_atomic_load_rcpc_zext): Adjust output template. (*aarch64_atomic_load_rcpc_sex): Guard on TARGET_RCPC2. Adjust output template. * config/aarch64/iterators.md (w_sz): New mode attr. gcc/testsuite/ChangeLog: * gcc.target/aarch64/ldapr-ext.c: Rename to... * gcc.target/aarch64/ldapr-zext.c: ... This. Fix expected assembly. * gcc.target/aarch64/ldapr-sext.c: New test. --- gcc/config/aarch64/aarch64.h | 4 ++ gcc/config/aarch64/atomics.md | 6 +- gcc/config/aarch64/iterators.md | 4 ++ gcc/testsuite/gcc.target/aarch64/ldapr-ext.c | 94 --------------------------- gcc/testsuite/gcc.target/aarch64/ldapr-sext.c | 67 +++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/ldapr-zext.c | 67 +++++++++++++++++++ 6 files changed, 145 insertions(+), 97 deletions(-) delete mode 100644 gcc/testsuite/gcc.target/aarch64/ldapr-ext.c create mode 100644 gcc/testsuite/gcc.target/aarch64/ldapr-sext.c create mode 100644 gcc/testsuite/gcc.target/aarch64/ldapr-zext.c (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 070466d..dcb1ece 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -336,6 +336,10 @@ enum class aarch64_feature : unsigned char { /* RCPC loads from Armv8.3-a. */ #define TARGET_RCPC (AARCH64_ISA_RCPC) +/* The RCPC2 extensions from Armv8.4-a that allow immediate offsets to LDAPR + and sign-extending versions.*/ +#define TARGET_RCPC2 (AARCH64_ISA_RCPC8_4) + /* Apply the workaround for Cortex-A53 erratum 835769. */ #define TARGET_FIX_ERR_A53_835769 \ ((aarch64_fix_a53_err835769 == 2) \ diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md index 1805012..b6eac4e 100644 --- a/gcc/config/aarch64/atomics.md +++ b/gcc/config/aarch64/atomics.md @@ -712,7 +712,7 @@ (match_operand:SI 2 "const_int_operand")] ;; model UNSPECV_LDAP)))] "TARGET_RCPC && ( > )" - "ldapr\t%0, %1" + "ldapr\t%w0, %1" ) (define_insn "*aarch64_atomic_load_rcpc_sext" @@ -722,8 +722,8 @@ [(match_operand:ALLX 1 "aarch64_sync_memory_operand" "Q") (match_operand:SI 2 "const_int_operand")] ;; model UNSPECV_LDAP)))] - "TARGET_RCPC && ( > )" - "ldaprs\t%0, %1" + "TARGET_RCPC2 && ( > )" + "ldapurs\t%0, %1" ) (define_insn "atomic_store" diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 7c7fcbb..a3e4075 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1012,6 +1012,10 @@ ;; 32-bit version and "%x0" in the 64-bit version. (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")]) +;; Similar to w above, but used for sign-extending loads where we want to +;; use %x0 for SImode. +(define_mode_attr w_sx [(QI "w") (HI "w") (SI "x")]) + ;; The size of access, in bytes. (define_mode_attr ldst_sz [(SI "4") (DI "8")]) ;; Likewise for load/store pair. diff --git a/gcc/testsuite/gcc.target/aarch64/ldapr-ext.c b/gcc/testsuite/gcc.target/aarch64/ldapr-ext.c deleted file mode 100644 index aed27e0..0000000 --- a/gcc/testsuite/gcc.target/aarch64/ldapr-ext.c +++ /dev/null @@ -1,94 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -std=c99" } */ -/* { dg-final { check-function-bodies "**" "" "" } } */ -#include - -#pragma GCC target "+rcpc" - -atomic_ullong u64; -atomic_llong s64; -atomic_uint u32; -atomic_int s32; -atomic_ushort u16; -atomic_short s16; -atomic_uchar u8; -atomic_schar s8; - -#define TEST(name, ldsize, rettype) \ -rettype \ -test_##name (void) \ -{ \ - return atomic_load_explicit (&ldsize, memory_order_acquire); \ -} - -/* -**test_u8_u64: -**... -** ldaprb x0, \[x[0-9]+\] -** ret -*/ - -TEST(u8_u64, u8, unsigned long long) - -/* -**test_s8_s64: -**... -** ldaprsb x0, \[x[0-9]+\] -** ret -*/ - -TEST(s8_s64, s8, long long) - -/* -**test_u16_u64: -**... -** ldaprh x0, \[x[0-9]+\] -** ret -*/ - -TEST(u16_u64, u16, unsigned long long) - -/* -**test_s16_s64: -**... -** ldaprsh x0, \[x[0-9]+\] -** ret -*/ - -TEST(s16_s64, s16, long long) - -/* -**test_u8_u32: -**... -** ldaprb w0, \[x[0-9]+\] -** ret -*/ - -TEST(u8_u32, u8, unsigned) - -/* -**test_s8_s32: -**... -** ldaprsb w0, \[x[0-9]+\] -** ret -*/ - -TEST(s8_s32, s8, int) - -/* -**test_u16_u32: -**... -** ldaprh w0, \[x[0-9]+\] -** ret -*/ - -TEST(u16_u32, u16, unsigned) - -/* -**test_s16_s32: -**... -** ldaprsh w0, \[x[0-9]+\] -** ret -*/ - -TEST(s16_s32, s16, int) diff --git a/gcc/testsuite/gcc.target/aarch64/ldapr-sext.c b/gcc/testsuite/gcc.target/aarch64/ldapr-sext.c new file mode 100644 index 0000000..292c55d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ldapr-sext.c @@ -0,0 +1,67 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -std=c99" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ +#include + +#pragma GCC target "arch=armv8.4-a" + +atomic_ullong u64; +atomic_llong s64; +atomic_uint u32; +atomic_int s32; +atomic_ushort u16; +atomic_short s16; +atomic_uchar u8; +atomic_schar s8; + +#define TEST(name, ldsize, rettype) \ +rettype \ +test_##name (void) \ +{ \ + return atomic_load_explicit (&ldsize, memory_order_acquire); \ +} + +/* +**test_s8_s64: +**... +** ldapursb w0, \[x[0-9]+\] +** ret +*/ + +TEST(s8_s64, s8, long long) + +/* +**test_s16_s64: +**... +** ldapursh w0, \[x[0-9]+\] +** ret +*/ + +TEST(s16_s64, s16, long long) + +/* +**test_s32_s64: +**... +** ldapursw x0, \[x[0-9]+\] +** ret +*/ + +TEST(s32_s64, s32, long long) + +/* +**test_s8_s32: +**... +** ldapursb w0, \[x[0-9]+\] +** ret +*/ + +TEST(s8_s32, s8, int) + +/* +**test_s16_s32: +**... +** ldapursh w0, \[x[0-9]+\] +** ret +*/ + +TEST(s16_s32, s16, int) diff --git a/gcc/testsuite/gcc.target/aarch64/ldapr-zext.c b/gcc/testsuite/gcc.target/aarch64/ldapr-zext.c new file mode 100644 index 0000000..6f448ee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ldapr-zext.c @@ -0,0 +1,67 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -std=c99" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ +#include + +#pragma GCC target "+rcpc" + +atomic_ullong u64; +atomic_llong s64; +atomic_uint u32; +atomic_int s32; +atomic_ushort u16; +atomic_short s16; +atomic_uchar u8; +atomic_schar s8; + +#define TEST(name, ldsize, rettype) \ +rettype \ +test_##name (void) \ +{ \ + return atomic_load_explicit (&ldsize, memory_order_acquire); \ +} + +/* +**test_u8_u64: +**... +** ldaprb w0, \[x[0-9]+\] +** ret +*/ + +TEST(u8_u64, u8, unsigned long long) + +/* +**test_u16_u64: +**... +** ldaprh w0, \[x[0-9]+\] +** ret +*/ + +TEST(u16_u64, u16, unsigned long long) + +/* +**test_u32_u64: +**... +** ldapr w0, \[x[0-9]+\] +** ret +*/ + +TEST(u32_u64, u32, unsigned long long) + +/* +**test_u8_u32: +**... +** ldaprb w0, \[x[0-9]+\] +** ret +*/ + +TEST(u8_u32, u8, unsigned) + +/* +**test_u16_u32: +**... +** ldaprh w0, \[x[0-9]+\] +** ret +*/ +TEST(u16_u32, u16, unsigned) + -- cgit v1.1