diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2024-01-12 12:29:22 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2024-01-12 12:29:22 +0000 |
commit | d76651d9170099de4d3bc3cbc3811d499018910b (patch) | |
tree | f8baf1bc979718f6a186f0bad0796b28b8408eb8 /gcc | |
parent | 0acb63670bf1058fce00a75bd318c40be3bfa222 (diff) | |
download | gcc-d76651d9170099de4d3bc3cbc3811d499018910b.zip gcc-d76651d9170099de4d3bc3cbc3811d499018910b.tar.gz gcc-d76651d9170099de4d3bc3cbc3811d499018910b.tar.bz2 |
aarch64: Use a separate group for SME builtins [PR112989]
The PR shows that we were registering the same overloaded SVE
builtins twice. This was supposed to be prevented by
function_builder::add_overloaded_function, which uses a map
to detect whether a function of the same name has already been
registered. add_overloaded_function then had some asserts to
check for consistency.
However, the map that add_overloaded_function uses was a member of
function_builder itself. That made sense when there was just one
header file, arm_sve.h, since it meant that the memory could be
reclaimed once arm_sve.h had been processed. But now we have three
header files, and in principle, it's possible for arm_sme.h to include
overloads of things that arm_sve.h also defines. We therefore need
to use a global map instead.
However, doing that meant that the consistency checks in
add_overloaded_function fired as expected, which showed some
latent issues. This preliminary patch deals with those by adding
AARCH64_FL_SME to things that require AARCH64_FL_SME2.
This inconsistency led to another problem: functions were selected
for arm_sme.h over arm_sve.h based on whether they had AARCH64_FL_SME.
So some SME2-only things were actually defined in arm_sve.h, whereas
similar SME things were defined in arm_sme.h.
Choosing based on flags was an early get-started crutch that I forgot
to clean up later :( This patch goes for the more direct approach of
having a separate table of SME builtins, as for arm_neon_sve_bridge.h.
aarch64-sve-builtins-sve2.def contains several intrinsics that are
currently SME-only but that operate entirely on vector registers.
Many of these will be extended to SVE2.1 once SVE2.1 support is added,
so the patch front-loads that by keeping the current division between
aarch64-sve-builtins-sve2.def (whose functions now go in arm_sve.h)
and aarch64-sve-builtins-sme.def (whose functions now go in arm_sme.h).
gcc/
PR target/112989
* config/aarch64/aarch64-sve-builtins.def: Don't include
aarch64-sve-builtins-sme.def.
(DEF_SME_ZA_FUNCTION_GS, DEF_SME_ZA_FUNCTION): Move to...
* config/aarch64/aarch64-sve-builtins-sme.def: ...here.
(DEF_SME_FUNCTION): New macro. Use it and DEF_SME_FUNCTION_GS
instead of DEF_SVE_*. Add AARCH64_FL_SME to anything that
requires AARCH64_FL_SME2.
* config/aarch64/aarch64-sve-builtins-sve2.def: Make same
AARCH64_FL_SME adjustment here.
* config/aarch64/aarch64-sve-builtins.cc (function_groups): Don't
include SME intrinsics.
(sme_function_groups): New array.
(handle_arm_sve_h): Remove check for AARCH64_FL_SME.
(handle_arm_sme_h): Use sme_function_groups instead of function_groups.
gcc/testsuite/
PR target/112989
* gcc.target/aarch64/sve/acle/general-c/clamp_1.c: Remove bogus
error test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins-sme.def | 53 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins.cc | 26 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins.def | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clamp_1.c | 2 |
5 files changed, 55 insertions, 40 deletions
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def index 5109c5e..416df0b 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def @@ -17,16 +17,31 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +#ifndef DEF_SME_FUNCTION +#define DEF_SME_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ + DEF_SME_FUNCTION_GS (NAME, SHAPE, TYPES, none, PREDS) +#endif + +#ifndef DEF_SME_ZA_FUNCTION_GS +#define DEF_SME_ZA_FUNCTION_GS(NAME, SHAPE, TYPES, GROUP, PREDS) \ + DEF_SME_FUNCTION_GS (NAME, SHAPE, TYPES, GROUP, PREDS) +#endif + +#ifndef DEF_SME_ZA_FUNCTION +#define DEF_SME_ZA_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ + DEF_SME_ZA_FUNCTION_GS (NAME, SHAPE, TYPES, none, PREDS) +#endif + #define REQUIRED_EXTENSIONS 0 -DEF_SVE_FUNCTION (arm_has_sme, bool_inherent, none, none) -DEF_SVE_FUNCTION (arm_in_streaming_mode, bool_inherent, none, none) +DEF_SME_FUNCTION (arm_has_sme, bool_inherent, none, none) +DEF_SME_FUNCTION (arm_in_streaming_mode, bool_inherent, none, none) #undef REQUIRED_EXTENSIONS #define REQUIRED_EXTENSIONS AARCH64_FL_SME -DEF_SVE_FUNCTION (svcntsb, count_inherent, none, none) -DEF_SVE_FUNCTION (svcntsd, count_inherent, none, none) -DEF_SVE_FUNCTION (svcntsh, count_inherent, none, none) -DEF_SVE_FUNCTION (svcntsw, count_inherent, none, none) +DEF_SME_FUNCTION (svcntsb, count_inherent, none, none) +DEF_SME_FUNCTION (svcntsd, count_inherent, none, none) +DEF_SME_FUNCTION (svcntsh, count_inherent, none, none) +DEF_SME_FUNCTION (svcntsw, count_inherent, none, none) DEF_SME_ZA_FUNCTION (svldr, ldr_za, za, none) DEF_SME_ZA_FUNCTION (svstr, str_za, za, none) DEF_SME_ZA_FUNCTION (svundef, inherent_za, za, none) @@ -75,17 +90,17 @@ DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_d_float, za_m) DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_d_float, za_m) #undef REQUIRED_EXTENSIONS -#define REQUIRED_EXTENSIONS AARCH64_FL_SME2 -DEF_SVE_FUNCTION (svldr_zt, ldr_zt, none, none) -DEF_SVE_FUNCTION (svstr_zt, str_zt, none, none) -DEF_SVE_FUNCTION (svzero_zt, inherent_zt, none, none) +#define REQUIRED_EXTENSIONS AARCH64_FL_SME | AARCH64_FL_SME2 +DEF_SME_FUNCTION (svldr_zt, ldr_zt, none, none) +DEF_SME_FUNCTION (svstr_zt, str_zt, none, none) +DEF_SME_FUNCTION (svzero_zt, inherent_zt, none, none) #undef REQUIRED_EXTENSIONS /* The d_za entries in this section just declare C _za64 overloads, which will then be resolved to either an integer function or a floating-point function. They are needed because the integer and floating-point functions have different architecture requirements. */ -#define REQUIRED_EXTENSIONS AARCH64_FL_SME2 | AARCH64_FL_SM_ON +#define REQUIRED_EXTENSIONS AARCH64_FL_SME | AARCH64_FL_SME2 | AARCH64_FL_SM_ON DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_s_data, vg1x24, none) DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, d_za, vg1x24, none) DEF_SME_ZA_FUNCTION_GS (svadd_write, binary_za_slice_opt_single, za_s_integer, @@ -100,9 +115,9 @@ DEF_SME_ZA_FUNCTION_GS (svdot_lane, dot_za_slice_lane, za_s_h_data, vg1x24, none) DEF_SME_ZA_FUNCTION_GS (svdot_lane, dot_za_slice_lane, za_s_b_integer, vg1x24, none) -DEF_SVE_FUNCTION_GS (svluti2_lane_zt, luti2_lane_zt, bhs_data, x124, none) -DEF_SVE_FUNCTION_GS (svluti4_lane_zt, luti4_lane_zt, bhs_data, x12, none) -DEF_SVE_FUNCTION_GS (svluti4_lane_zt, luti4_lane_zt, hs_data, x4, none) +DEF_SME_FUNCTION_GS (svluti2_lane_zt, luti2_lane_zt, bhs_data, x124, none) +DEF_SME_FUNCTION_GS (svluti4_lane_zt, luti4_lane_zt, bhs_data, x12, none) +DEF_SME_FUNCTION_GS (svluti4_lane_zt, luti4_lane_zt, hs_data, x4, none) DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_s_float, vg1x24, none) DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_s_h_data, @@ -157,7 +172,8 @@ DEF_SME_ZA_FUNCTION_GS (svwrite_hor, write_za, za_bhsd_data, vg24, none) DEF_SME_ZA_FUNCTION_GS (svwrite_ver, write_za, za_bhsd_data, vg24, none) #undef REQUIRED_EXTENSIONS -#define REQUIRED_EXTENSIONS (AARCH64_FL_SME2 \ +#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \ + | AARCH64_FL_SME2 \ | AARCH64_FL_SME_I16I64 \ | AARCH64_FL_SM_ON) DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_d_integer, vg1x24, none) @@ -182,7 +198,8 @@ DEF_SME_ZA_FUNCTION_GS (svvdot_lane, dot_za_slice_lane, za_d_h_integer, vg1x4, none) #undef REQUIRED_EXTENSIONS -#define REQUIRED_EXTENSIONS (AARCH64_FL_SME2 \ +#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \ + | AARCH64_FL_SME2 \ | AARCH64_FL_SME_F64F64 \ | AARCH64_FL_SM_ON) DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_d_float, vg1x24, none) @@ -196,3 +213,7 @@ DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_d_float, vg1x24, none) DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_d_float, vg1x24, none) #undef REQUIRED_EXTENSIONS + +#undef DEF_SME_ZA_FUNCTION +#undef DEF_SME_ZA_FUNCTION_GS +#undef DEF_SME_FUNCTION diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index 89bb134..4366925 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -241,6 +241,7 @@ DEF_SVE_FUNCTION (svrevd, unary, all_data, mxz) #define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ | AARCH64_FL_SVE2 \ + | AARCH64_FL_SME \ | AARCH64_FL_SME2 \ | AARCH64_FL_SM_ON) DEF_SVE_FUNCTION_GS (svadd, binary_single, all_integer, x24, none) diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index cd8d3bd..3ad2271 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -882,18 +882,15 @@ static const predication_index preds_z[] = { PRED_z, NUM_PREDS }; /* Used by SME instructions that always merge into ZA. */ static const predication_index preds_za_m[] = { PRED_za_m, NUM_PREDS }; -/* A list of all SVE ACLE functions. */ +/* A list of all arm_sve.h functions. */ static CONSTEXPR const function_group_info function_groups[] = { #define DEF_SVE_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_##GROUPS, \ preds_##PREDS, REQUIRED_EXTENSIONS }, -#define DEF_SME_ZA_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ - { #NAME, &functions::NAME##_za, &shapes::SHAPE, types_##TYPES, \ - groups_##GROUPS, preds_##PREDS, (REQUIRED_EXTENSIONS | AARCH64_FL_ZA_ON) }, #include "aarch64-sve-builtins.def" }; -/* A list of all NEON-SVE-Bridge ACLE functions. */ +/* A list of all arm_neon_sve_bridge.h ACLE functions. */ static CONSTEXPR const function_group_info neon_sve_function_groups[] = { #define DEF_NEON_SVE_FUNCTION(NAME, SHAPE, TYPES, GROUPS, PREDS) \ { #NAME, &neon_sve_bridge_functions::NAME, &shapes::SHAPE, types_##TYPES, \ @@ -901,6 +898,17 @@ static CONSTEXPR const function_group_info neon_sve_function_groups[] = { #include "aarch64-neon-sve-bridge-builtins.def" }; +/* A list of all arm_sme.h functions. */ +static CONSTEXPR const function_group_info sme_function_groups[] = { +#define DEF_SME_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ + { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_##GROUPS, \ + preds_##PREDS, REQUIRED_EXTENSIONS }, +#define DEF_SME_ZA_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ + { #NAME, &functions::NAME##_za, &shapes::SHAPE, types_##TYPES, \ + groups_##GROUPS, preds_##PREDS, (REQUIRED_EXTENSIONS | AARCH64_FL_ZA_ON) }, +#include "aarch64-sve-builtins-sme.def" +}; + /* The scalar type associated with each vector type. */ extern GTY(()) tree scalar_types[NUM_VECTOR_TYPES + 1]; tree scalar_types[NUM_VECTOR_TYPES + 1]; @@ -4629,8 +4637,7 @@ handle_arm_sve_h () function_table = new hash_table<registered_function_hasher> (1023); function_builder builder; for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i) - if (!(function_groups[i].required_extensions & AARCH64_FL_SME)) - builder.register_function_group (function_groups[i]); + builder.register_function_group (function_groups[i]); } /* Implement #pragma GCC aarch64 "arm_neon_sve_bridge.h". */ @@ -4675,9 +4682,8 @@ handle_arm_sme_h () sme_switcher sme; function_builder builder; - for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i) - if (function_groups[i].required_extensions & AARCH64_FL_SME) - builder.register_function_group (function_groups[i]); + for (unsigned int i = 0; i < ARRAY_SIZE (sme_function_groups); ++i) + builder.register_function_group (sme_function_groups[i]); } /* If we're implementing manual overloading, check whether the SVE diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def index 61593b4..a9243c4 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.def +++ b/gcc/config/aarch64/aarch64-sve-builtins.def @@ -51,16 +51,6 @@ DEF_SVE_FUNCTION_GS (NAME, SHAPE, TYPES, none, PREDS) #endif -#ifndef DEF_SME_ZA_FUNCTION_GS -#define DEF_SME_ZA_FUNCTION_GS(NAME, SHAPE, TYPES, GROUP, PREDS) \ - DEF_SVE_FUNCTION_GS(NAME, SHAPE, TYPES, GROUP, PREDS) -#endif - -#ifndef DEF_SME_ZA_FUNCTION -#define DEF_SME_ZA_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ - DEF_SME_ZA_FUNCTION_GS (NAME, SHAPE, TYPES, none, PREDS) -#endif - DEF_SVE_MODE (n, none, none, none) DEF_SVE_MODE (single, none, none, none) DEF_SVE_MODE (index, none, none, elements) @@ -168,11 +158,8 @@ DEF_SVE_GROUP_SUFFIX (vg4x4, 4, 4) #include "aarch64-sve-builtins-base.def" #include "aarch64-sve-builtins-sve2.def" -#include "aarch64-sve-builtins-sme.def" -#undef DEF_SME_ZA_FUNCTION #undef DEF_SVE_FUNCTION -#undef DEF_SME_ZA_FUNCTION_GS #undef DEF_SVE_FUNCTION_GS #undef DEF_SVE_GROUP_SUFFIX #undef DEF_SME_ZA_SUFFIX diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clamp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clamp_1.c index 342bebc..07e22d2 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clamp_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clamp_1.c @@ -13,7 +13,7 @@ f1 (svcount_t pn, svfloat16_t f16, svint16_t s16, svfloat32_t f32, svclamp (f16, f16, f16, f16); /* { dg-error {too many arguments to function 'svclamp'} } */ svclamp (0, f16, f16); /* { dg-error {passing 'int' to argument 1 of 'svclamp', which expects an SVE type rather than a scalar type} } */ svclamp (f16, f16, f16); - svclamp (s16, s16, s16); /* { dg-error {'svclamp' has no form that takes 'svint16_t' arguments} } */ + svclamp (s16, s16, s16); svclamp (pn, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svclamp', but argument 1 had type 'svcount_t'} } */ svclamp (f16, s16, f16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svclamp', but argument 1 had type 'svfloat16_t'} } */ svclamp (f16, f32, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svclamp', but argument 1 had type 'svfloat16_t'} } */ |