diff options
author | Spencer Abson <spencer.abson@arm.com> | 2025-01-23 19:48:49 +0000 |
---|---|---|
committer | Christophe Lyon <christophe.lyon@arm.com> | 2025-04-22 17:07:39 +0200 |
commit | fabbf99fc8ae26f0d87ff0c8619b4fc3bb4a5da2 (patch) | |
tree | 1d561a9ea2d030dbb40f6909a132e25b96a7c430 | |
parent | 5d5e8e87a42af8c0d962fa16dc9835fb71778250 (diff) | |
download | gcc-fabbf99fc8ae26f0d87ff0c8619b4fc3bb4a5da2.zip gcc-fabbf99fc8ae26f0d87ff0c8619b4fc3bb4a5da2.tar.gz gcc-fabbf99fc8ae26f0d87ff0c8619b4fc3bb4a5da2.tar.bz2 |
AArch64: Define the spaceship optab [PR117013]
This expansion ensures that exactly one comparison is emitted for
spacesip-like sequences on floating-point operands, including when
the result of such sequences are compared against members of
std::<some_ordering>::<some_value>.
For both integer and floating-point types, we optimize for the case
in which the result of a spaceship-like operation is written to a GPR.
The PR highlights this issue for floating-point operands, but we also
make an improvement for integers, preferring:
cmp w0, w1
cset w1, gt
csinv w0, w1, wzr, ge
over:
cmp w0, w1
mov w0, 1
csinv w0, w0, wzr, ge
csel w0, w0, wzr, ne
to compute:
auto test(int a, int b) { return a <=> b;}
gcc/ChangeLog:
PR target/117013
* config/aarch64/aarch64-protos.h (aarch64_expand_fp_spaceship):
Declare optab expander function for floating-point types.
* config/aarch64/aarch64.cc (aarch64_expand_fp_spaceship):
Define optab expansion for floating-point types (new function).
* config/aarch64/aarch64.md (spaceship<mode>4):
Add define_expands for spaceship<mode>4 on integer and
floating-point types.
gcc/testsuite/ChangeLog:
PR target/117013
* g++.target/aarch64/spaceship_1.C: New test.
* g++.target/aarch64/spaceship_2.C: New test.
* g++.target/aarch64/spaceship_3.C: New test.
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.cc | 73 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 43 | ||||
-rw-r--r-- | gcc/testsuite/g++.target/aarch64/spaceship_1.C | 192 | ||||
-rw-r--r-- | gcc/testsuite/g++.target/aarch64/spaceship_2.C | 72 | ||||
-rw-r--r-- | gcc/testsuite/g++.target/aarch64/spaceship_3.C | 9 |
6 files changed, 390 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 8f44aea..1ca86c9 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1260,6 +1260,7 @@ void aarch64_restore_za (rtx); void aarch64_expand_crc_using_pmull (scalar_mode, scalar_mode, rtx *); void aarch64_expand_reversed_crc_using_pmull (scalar_mode, scalar_mode, rtx *); +void aarch64_expand_fp_spaceship (rtx, rtx, rtx, rtx); extern bool aarch64_gcs_enabled (); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 433ec97..38c112c 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -31294,6 +31294,79 @@ aarch64_expand_reversed_crc_using_pmull (scalar_mode crc_mode, } } +/* Expand the spaceship optab for floating-point operands. + + If the result is compared against (-1, 0, 1 , 2), expand into + fcmpe + conditional branch insns. + + Otherwise (the result is just stored as an integer), expand into + fcmpe + a sequence of conditional select/increment/invert insns. */ +void +aarch64_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx hint) +{ + rtx cc_reg = gen_rtx_REG (CCFPEmode, CC_REGNUM); + emit_set_insn (cc_reg, gen_rtx_COMPARE (CCFPEmode, op0, op1)); + + rtx cc_gt = gen_rtx_GT (VOIDmode, cc_reg, const0_rtx); + rtx cc_lt = gen_rtx_LT (VOIDmode, cc_reg, const0_rtx); + rtx cc_un = gen_rtx_UNORDERED (VOIDmode, cc_reg, const0_rtx); + + if (hint == const0_rtx) + { + rtx un_label = gen_label_rtx (); + rtx lt_label = gen_label_rtx (); + rtx gt_label = gen_label_rtx (); + rtx end_label = gen_label_rtx (); + + rtx temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_un, + gen_rtx_LABEL_REF (Pmode, un_label), pc_rtx); + aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, temp)); + + temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_lt, + gen_rtx_LABEL_REF (Pmode, lt_label), pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, temp)); + + temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_gt, + gen_rtx_LABEL_REF (Pmode, gt_label), pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, temp)); + + /* Equality. */ + emit_move_insn (dest, const0_rtx); + emit_jump (end_label); + + emit_label (un_label); + emit_move_insn (dest, const2_rtx); + emit_jump (end_label); + + emit_label (gt_label); + emit_move_insn (dest, const1_rtx); + emit_jump (end_label); + + emit_label (lt_label); + emit_move_insn (dest, constm1_rtx); + + emit_label (end_label); + } + else + { + rtx temp0 = gen_reg_rtx (SImode); + rtx temp1 = gen_reg_rtx (SImode); + rtx cc_ungt = gen_rtx_UNGT (VOIDmode, cc_reg, const0_rtx); + + /* The value of hint is stored if the operands are unordered. */ + rtx temp_un = gen_int_mode (UINTVAL (hint) - 1, SImode); + if (!aarch64_reg_zero_or_m1_or_1 (temp_un, SImode)) + temp_un = force_reg (SImode, temp_un); + + emit_set_insn (temp0, gen_rtx_IF_THEN_ELSE (SImode, cc_lt, + constm1_rtx, const0_rtx)); + emit_set_insn (temp1, gen_rtx_IF_THEN_ELSE (SImode, cc_un, + temp_un, const0_rtx)); + emit_set_insn (dest, gen_rtx_IF_THEN_ELSE (SImode, cc_ungt, + gen_rtx_PLUS (SImode, temp1, const1_rtx), temp0)); + } +} + /* Target-specific selftests. */ #if CHECKING_P diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 031e621..10fdeec 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4392,6 +4392,49 @@ [(set_attr "type" "alus_ext")] ) +;; <=> operator pattern (integer) +;; (a == b) ? 0 : (a < b) ? -1 : 1. +(define_expand "spaceship<mode>4" + [(match_operand:SI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + "" + { + // 1 indicates unsigned comparison, -1 indicates signed. + gcc_assert (operands[3] == constm1_rtx || operands[3] == const1_rtx); + + rtx cc_reg = aarch64_gen_compare_reg (EQ, operands[1], operands[2]); + RTX_CODE code_gt = operands[3] == const1_rtx ? GTU : GT; + RTX_CODE code_lt = operands[3] == const1_rtx ? LTU : LT; + + rtx cc_gt = gen_rtx_fmt_ee (code_gt, VOIDmode, cc_reg, const0_rtx); + rtx cc_lt = gen_rtx_fmt_ee (code_lt, VOIDmode, cc_reg, const0_rtx); + + rtx temp = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (temp, gen_rtx_IF_THEN_ELSE (SImode, cc_gt, + const1_rtx, const0_rtx))); + emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (SImode, cc_lt, + constm1_rtx, temp))); + DONE; + } +) + +;; <=> operator pattern (floating-point) +;; (a == b) ? 0 : (a < b) ? -1 : (a > b) ? 1 : UNORDERED. +(define_expand "spaceship<mode>4" + [(match_operand:SI 0 "register_operand") + (match_operand:GPF 1 "register_operand") + (match_operand:GPF 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + "TARGET_FLOAT" + { + aarch64_expand_fp_spaceship (operands[0], operands[1], operands[2], + operands[3]); + DONE; + } +) + ;; ------------------------------------------------------------------- ;; Store-flag and conditional select insns ;; ------------------------------------------------------------------- diff --git a/gcc/testsuite/g++.target/aarch64/spaceship_1.C b/gcc/testsuite/g++.target/aarch64/spaceship_1.C new file mode 100644 index 0000000..e6daf62 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/spaceship_1.C @@ -0,0 +1,192 @@ +// PR117013 +/* { dg-do run } */ +/* { dg-options "-O2 -std=c++20 -save-temps -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" ""} } */ + +#include <compare> +#include <stdint.h> + +/* Some implementation-defined value (other than 2) to represent + partial_ordering::unordered (that for libc++ in this case). */ +#define IMP_UN -127 + +#define SPACESHIP_FN(TYPE) \ + [[gnu::noipa]] \ + auto ss_##TYPE (TYPE a, TYPE b) \ + { return a <=> b; } \ + +#define SPACESHIP_FN_NN(TYPE) \ + [[gnu::noipa, gnu::optimize ("-ffinite-math-only")]] \ + auto ss_##TYPE##_no_nans (TYPE a, TYPE b) \ + { return a <=> b; } \ + +/* <=> implementation for floating-point operands. */ +#define SPACESHIP_FP_IDIOM(TYPE) \ + [[gnu::noipa]] \ + int ss_##TYPE##_idiom (TYPE a, TYPE b) \ + { return ((a) == (b) ? 0 : (a) < (b) ? -1 : (a) > (b) ? 1 : IMP_UN); } \ + +#define RUN_TEST(TYPE, ARGA, ARGB, EXPECT, SUFF) \ + if (ss_##TYPE##SUFF ((ARGA), (ARGB)) != (EXPECT)) \ + __builtin_abort(); \ + +/* +** _Z8ss_floatff: +** fcmpe s0, s1 +** csinv (w[0-9]+), wzr, wzr, pl +** cset (w[0-9]+), vs +** csinc w0, \1, \2, ls +** ret +*/ +SPACESHIP_FN(float); + +/* +** _Z16ss_float_no_nansff: +** fcmpe s0, s1 +** csinv (w[0-9]+), wzr, wzr, pl +** csinc w0, \1, wzr, ls +** ret +*/ +SPACESHIP_FN_NN(float); + +/* +** _Z9ss_doubledd: +** fcmpe d0, d1 +** csinv (w[0-9]+), wzr, wzr, pl +** cset (w[0-9]+), vs +** csinc w0, \1, \2, ls +** ret +*/ +SPACESHIP_FN(double); + +/* +** _Z17ss_double_no_nansdd: +** fcmpe d0, d1 +** csinv (w[0-9]+), wzr, wzr, pl +** csinc w0, \1, wzr, ls +** ret +*/ +SPACESHIP_FN_NN(double); + +/* +** _Z14ss_float_idiomff: +** fcmpe s0, s1 +** csinv (w[0-9]+), wzr, wzr, pl +** mov (w[0-9]+), -128 +** csel (w[0-9]+), \2, wzr, vs +** csinc w0, \1, \3, ls +** ret +*/ +SPACESHIP_FP_IDIOM(float); + +/* +** _Z15ss_double_idiomdd: +** fcmpe d0, d1 +** csinv (w[0-9]+), wzr, wzr, pl +** mov (w[0-9]+), -128 +** csel (w[0-9]+), \2, wzr, vs +** csinc w0, \1, \3, ls +** ret +*/ +SPACESHIP_FP_IDIOM(double); + +/* +** _Z10ss_int32_tii: +** cmp w0, w1 +** cset (w[0-9]+), gt +** csinv w0, \1, wzr, ge +** ret +*/ +SPACESHIP_FN(int32_t); + +/* +** _Z10ss_int64_tll: +** cmp x0, x1 +** cset (w[0-9]+), gt +** csinv w0, \1, wzr, ge +** ret +*/ +SPACESHIP_FN(int64_t); + +/* +** _Z11ss_uint32_tjj: +** cmp w0, w1 +** cset (w[0-9]+), hi +** csinv w0, \1, wzr, cs +** ret +*/ +SPACESHIP_FN(uint32_t); + +/* +** _Z11ss_uint64_tmm: +** cmp x0, x1 +** cset (w[0-9]+), hi +** csinv w0, \1, wzr, cs +** ret +*/ +SPACESHIP_FN(uint64_t); + + +int +main() +{ + /* Single precision floating point. */ + RUN_TEST (float, -1.0f, 1.0f, std::partial_ordering::less,); + RUN_TEST (float, -1.0f, 1.0f, -1, _idiom); + + RUN_TEST (float, 1.0f, -1.0f, std::partial_ordering::greater,); + RUN_TEST (float, 1.0f, -1.0f, 1, _idiom); + + RUN_TEST (float, -1.0f, -1.0f, std::partial_ordering::equivalent,); + RUN_TEST (float, -1.0f, -1.0f, 0, _idiom); + + RUN_TEST (float, __builtin_nanf(""), 1.0f, std::partial_ordering::unordered,); + RUN_TEST (float, __builtin_nanf(""), 1.0f, IMP_UN, _idiom); + RUN_TEST (float, 1.0f ,__builtin_nanf(""), std::partial_ordering::unordered,); + RUN_TEST (float, 1.0f, __builtin_nanf(""), IMP_UN, _idiom); + + /* No-NaNs. */ + RUN_TEST (float, -1.0f, 1.0f, std::partial_ordering::less, _no_nans); + RUN_TEST (float, 1.0f, -1.0f, std::partial_ordering::greater, _no_nans); + RUN_TEST (float, -1.0f, -1.0f, std::partial_ordering::equivalent, _no_nans); + + /* Double precision floating point. */ + RUN_TEST (double, -1.0f, 1.0f, std::partial_ordering::less,); + RUN_TEST (double, -1.0f, 1.0f, -1, _idiom); + + RUN_TEST (double, 1.0f, -1.0f, std::partial_ordering::greater,); + RUN_TEST (double, 1.0f, -1.0f, 1, _idiom); + + RUN_TEST (double, -1.0f, -1.0f, std::partial_ordering::equivalent,); + RUN_TEST (double, -1.0f, -1.0f, 0, _idiom); + + RUN_TEST (double, __builtin_nanf(""), 1.0f, std::partial_ordering::unordered,); + RUN_TEST (double, __builtin_nanf(""), 1.0f, IMP_UN, _idiom); + RUN_TEST (double, 1.0f, __builtin_nanf(""), std::partial_ordering::unordered,); + RUN_TEST (double, 1.0f, __builtin_nanf(""), IMP_UN, _idiom); + + /* No-NaNs. */ + RUN_TEST (double, -1.0f, 1.0f, std::partial_ordering::less, _no_nans); + RUN_TEST (double, 1.0f, -1.0f, std::partial_ordering::greater, _no_nans); + RUN_TEST (double, -1.0f, -1.0f, std::partial_ordering::equivalent, _no_nans); + + /* Single integer. */ + RUN_TEST (int32_t, -42, 0, std::strong_ordering::less,); + RUN_TEST (int32_t, 0, -42, std::strong_ordering::greater,); + RUN_TEST (int32_t, 42, 42, std::strong_ordering::equal,); + + RUN_TEST (uint32_t, 0, 42, std::strong_ordering::less,); + RUN_TEST (uint32_t, 42, 0, std::strong_ordering::greater,); + RUN_TEST (uint32_t, 42, 42, std::strong_ordering::equal,); + + /* Double integer. */ + RUN_TEST (int64_t, -42, 0, std::strong_ordering::less,); + RUN_TEST (int64_t, 42, 0, std::strong_ordering::greater,); + RUN_TEST (int64_t, 42, 42, std::strong_ordering::equal,); + + RUN_TEST (uint64_t, 0, 42, std::strong_ordering::less,); + RUN_TEST (uint64_t, 42, 0, std::strong_ordering::greater,); + RUN_TEST (uint64_t, 42, 42, std::strong_ordering::equal,); + + return 0; +}
\ No newline at end of file diff --git a/gcc/testsuite/g++.target/aarch64/spaceship_2.C b/gcc/testsuite/g++.target/aarch64/spaceship_2.C new file mode 100644 index 0000000..c1d3900 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/spaceship_2.C @@ -0,0 +1,72 @@ +// PR117013 +/* { dg-do run } */ +// { dg-options "-O2 -std=c++20 -save-temps" } + +#include <compare> + +#ifndef fp_type +#define fp_type float +#endif + +#define TEST_SS_IDIOM(ARGA, ARGB, EXPECT) \ + if (spaceship_idiom ((ARGA), (ARGB)) != (EXPECT)) \ + __builtin_abort(); \ + +#define TEST_BR_ON_SS(ARGA, ARGB, EXPECT) \ + if(branch_on_spaceship ((ARGA), (ARGB)) != (EXPECT)) \ + __builtin_abort(); \ + + +#define RUN_TEST(ARGA, ARGB, EXPECT) \ + TEST_SS_IDIOM(ARGA, ARGB, EXPECT) \ + TEST_BR_ON_SS(ARGA, ARGB, EXPECT) \ + +/* Test when .SPACESHIP prompts the back end to implement <=> with + conditional branches (only applies to floating-point operands). */ + +[[gnu::noipa]] auto +equiv() { return std::partial_ordering::equivalent; } +[[gnu::noipa]] auto +less() { return std::partial_ordering::less; } +[[gnu::noipa]] auto +greater() { return std::partial_ordering::greater; } +[[gnu::noipa]] auto +unordered() { return std::partial_ordering::unordered; } + +auto +spaceship_idiom(fp_type a, fp_type b) +{ + if (a == b) + return equiv(); + if (a < b) + return less(); + if (a > b) + return greater(); + return unordered(); +} + +auto +branch_on_spaceship(fp_type a, fp_type b) +{ + auto res = a <=> b; + if (res == 0) + return equiv(); + else if (res < 0) + return less(); + else if (res > 0) + return greater(); + return unordered(); +} + +int +main() +{ + RUN_TEST (-1.0f, 1.0f, std::partial_ordering::less); + RUN_TEST (1.0f, -1.0f, std::partial_ordering::greater); + RUN_TEST (1.0f, 1.0f, std::partial_ordering::equivalent); + RUN_TEST (1.0f, __builtin_nanf(""), std::partial_ordering::unordered); + RUN_TEST (__builtin_nanf(""), 1.0f, std::partial_ordering::unordered); +} + +/* { dg-final { scan-assembler-not "\tfcmp\t" } } */ +/* { dg-final { scan-assembler-times "\tfcmpe\t" 2 } } */
\ No newline at end of file diff --git a/gcc/testsuite/g++.target/aarch64/spaceship_3.C b/gcc/testsuite/g++.target/aarch64/spaceship_3.C new file mode 100644 index 0000000..f58b084 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/spaceship_3.C @@ -0,0 +1,9 @@ +// PR117013 +/* { dg-do run } */ +// { dg-options "-O2 -std=c++20 -save-temps" } + +#define fp_type double +#include "spaceship_2.C" + +/* { dg-final { scan-assembler-not "\tfcmp\t" } } */ +/* { dg-final { scan-assembler-times "\tfcmpe\t" 2 } } */
\ No newline at end of file |