AArch64: Define the spaceship optab [PR117013]

This expansion ensures that exactly one comparison is emitted for spacesip-like sequences on floating-point operands, including when the result of such sequences are compared against members of std::<some_ordering>::<some_value>. For both integer and floating-point types, we optimize for the case in which the result of a spaceship-like operation is written to a GPR. The PR highlights this issue for floating-point operands, but we also make an improvement for integers, preferring: cmp w0, w1 cset w1, gt csinv w0, w1, wzr, ge over: cmp w0, w1 mov w0, 1 csinv w0, w0, wzr, ge csel w0, w0, wzr, ne to compute: auto test(int a, int b) { return a <=> b;} gcc/ChangeLog: PR target/117013 * config/aarch64/aarch64-protos.h (aarch64_expand_fp_spaceship): Declare optab expander function for floating-point types. * config/aarch64/aarch64.cc (aarch64_expand_fp_spaceship): Define optab expansion for floating-point types (new function). * config/aarch64/aarch64.md (spaceship<mode>4): Add define_expands for spaceship<mode>4 on integer and floating-point types. gcc/testsuite/ChangeLog: PR target/117013 * g++.target/aarch64/spaceship_1.C: New test. * g++.target/aarch64/spaceship_2.C: New test. * g++.target/aarch64/spaceship_3.C: New test.
author: Spencer Abson <spencer.abson@arm.com> 2025-01-23 19:48:49 +0000
committer: Christophe Lyon <christophe.lyon@arm.com> 2025-04-22 17:07:39 +0200
commit: fabbf99fc8ae26f0d87ff0c8619b4fc3bb4a5da2 (patch)
tree: 1d561a9ea2d030dbb40f6909a132e25b96a7c430
parent: 5d5e8e87a42af8c0d962fa16dc9835fb71778250 (diff)
download: gcc-fabbf99fc8ae26f0d87ff0c8619b4fc3bb4a5da2.zip
gcc-fabbf99fc8ae26f0d87ff0c8619b4fc3bb4a5da2.tar.gz
gcc-fabbf99fc8ae26f0d87ff0c8619b4fc3bb4a5da2.tar.bz2
6 files changed, 390 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 8f44aea..1ca86c9 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1260,6 +1260,7 @@ void aarch64_restore_za (rtx);
 void aarch64_expand_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
 void aarch64_expand_reversed_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
 
+void aarch64_expand_fp_spaceship (rtx, rtx, rtx, rtx);
 
 extern bool aarch64_gcs_enabled ();
 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 433ec97..38c112c 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -31294,6 +31294,79 @@ aarch64_expand_reversed_crc_using_pmull (scalar_mode crc_mode,
     }
 }
 
+/* Expand the spaceship optab for floating-point operands.
+
+   If the result is compared against (-1, 0, 1 , 2), expand into
+   fcmpe + conditional branch insns.
+
+   Otherwise (the result is just stored as an integer), expand into
+   fcmpe + a sequence of conditional select/increment/invert insns.  */
+void
+aarch64_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx hint)
+{
+  rtx cc_reg = gen_rtx_REG (CCFPEmode, CC_REGNUM);
+  emit_set_insn (cc_reg, gen_rtx_COMPARE (CCFPEmode, op0, op1));
+
+  rtx cc_gt = gen_rtx_GT (VOIDmode, cc_reg, const0_rtx);
+  rtx cc_lt = gen_rtx_LT (VOIDmode, cc_reg, const0_rtx);
+  rtx cc_un = gen_rtx_UNORDERED (VOIDmode, cc_reg, const0_rtx);
+
+  if (hint == const0_rtx)
+    {
+      rtx un_label = gen_label_rtx ();
+      rtx lt_label = gen_label_rtx ();
+      rtx gt_label = gen_label_rtx ();
+      rtx end_label = gen_label_rtx ();
+
+      rtx temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_un,
+			gen_rtx_LABEL_REF (Pmode, un_label), pc_rtx);
+      aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, temp));
+
+      temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_lt,
+			gen_rtx_LABEL_REF (Pmode, lt_label), pc_rtx);
+      emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
+
+      temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_gt,
+			gen_rtx_LABEL_REF (Pmode, gt_label), pc_rtx);
+      emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
+
+      /* Equality.  */
+      emit_move_insn (dest, const0_rtx);
+      emit_jump (end_label);
+
+      emit_label (un_label);
+      emit_move_insn (dest, const2_rtx);
+      emit_jump (end_label);
+
+      emit_label (gt_label);
+      emit_move_insn (dest, const1_rtx);
+      emit_jump (end_label);
+
+      emit_label (lt_label);
+      emit_move_insn (dest, constm1_rtx);
+
+      emit_label (end_label);
+    }
+  else
+    {
+      rtx temp0 = gen_reg_rtx (SImode);
+      rtx temp1 = gen_reg_rtx (SImode);
+      rtx cc_ungt = gen_rtx_UNGT (VOIDmode, cc_reg, const0_rtx);
+
+      /* The value of hint is stored if the operands are unordered.  */
+      rtx temp_un = gen_int_mode (UINTVAL (hint) - 1, SImode);
+      if (!aarch64_reg_zero_or_m1_or_1 (temp_un, SImode))
+	temp_un = force_reg (SImode, temp_un);
+
+      emit_set_insn (temp0, gen_rtx_IF_THEN_ELSE (SImode, cc_lt,
+			constm1_rtx, const0_rtx));
+      emit_set_insn (temp1, gen_rtx_IF_THEN_ELSE (SImode, cc_un,
+			temp_un, const0_rtx));
+      emit_set_insn (dest, gen_rtx_IF_THEN_ELSE (SImode, cc_ungt,
+			gen_rtx_PLUS (SImode, temp1, const1_rtx), temp0));
+    }
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 031e621..10fdeec 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4392,6 +4392,49 @@
   [(set_attr "type" "alus_ext")]
 )
 
+;; <=> operator pattern (integer)
+;; (a == b) ? 0 : (a < b) ? -1 : 1.
+(define_expand "spaceship<mode>4"
+  [(match_operand:SI  0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "register_operand")
+   (match_operand:SI  3 "const_int_operand")]
+  ""
+  {
+    // 1 indicates unsigned comparison, -1 indicates signed.
+    gcc_assert (operands[3] == constm1_rtx || operands[3] == const1_rtx);
+
+    rtx cc_reg = aarch64_gen_compare_reg (EQ, operands[1], operands[2]);
+    RTX_CODE code_gt = operands[3] == const1_rtx ? GTU : GT;
+    RTX_CODE code_lt = operands[3] == const1_rtx ? LTU : LT;
+
+    rtx cc_gt = gen_rtx_fmt_ee (code_gt, VOIDmode, cc_reg, const0_rtx);
+    rtx cc_lt = gen_rtx_fmt_ee (code_lt, VOIDmode, cc_reg, const0_rtx);
+
+    rtx temp = gen_reg_rtx (SImode);
+    emit_insn (gen_rtx_SET (temp, gen_rtx_IF_THEN_ELSE (SImode, cc_gt,
+						const1_rtx, const0_rtx)));
+    emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (SImode, cc_lt,
+						constm1_rtx, temp)));
+    DONE;
+  }
+)
+
+;; <=> operator pattern (floating-point)
+;; (a == b) ? 0 : (a < b) ? -1 : (a > b) ? 1 : UNORDERED.
+(define_expand "spaceship<mode>4"
+  [(match_operand:SI  0 "register_operand")
+   (match_operand:GPF 1 "register_operand")
+   (match_operand:GPF 2 "register_operand")
+   (match_operand:SI  3 "const_int_operand")]
+  "TARGET_FLOAT"
+  {
+    aarch64_expand_fp_spaceship (operands[0], operands[1], operands[2],
+				operands[3]);
+    DONE;
+  }
+)
+
 ;; -------------------------------------------------------------------
 ;; Store-flag and conditional select insns
 ;; -------------------------------------------------------------------
diff --git a/gcc/testsuite/g++.target/aarch64/spaceship_1.C b/gcc/testsuite/g++.target/aarch64/spaceship_1.C
new file mode 100644
index 0000000..e6daf62
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/spaceship_1.C
@@ -0,0 +1,192 @@
+// PR117013
+/* { dg-do run } */
+/* { dg-options "-O2 -std=c++20 -save-temps -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" ""} } */
+
+#include <compare>
+#include <stdint.h>
+
+/*  Some implementation-defined value (other than 2) to represent
+    partial_ordering::unordered (that for libc++ in this case). */
+#define IMP_UN  -127
+
+#define SPACESHIP_FN(TYPE)              \
+    [[gnu::noipa]]                      \
+    auto ss_##TYPE (TYPE a, TYPE b)     \
+    { return a <=> b; }                 \
+
+#define SPACESHIP_FN_NN(TYPE)                                \
+    [[gnu::noipa, gnu::optimize ("-ffinite-math-only")]]     \
+    auto ss_##TYPE##_no_nans (TYPE a, TYPE b)                \
+    { return a <=> b; }                                      \
+
+/*  <=> implementation for floating-point operands. */
+#define SPACESHIP_FP_IDIOM(TYPE)                                              \
+    [[gnu::noipa]]                                                            \
+    int ss_##TYPE##_idiom (TYPE a, TYPE b)                                    \
+    { return ((a) == (b) ? 0 : (a) < (b) ? -1 : (a) > (b) ? 1 : IMP_UN); }    \
+
+#define RUN_TEST(TYPE, ARGA, ARGB, EXPECT, SUFF)            \
+    if (ss_##TYPE##SUFF ((ARGA), (ARGB)) != (EXPECT))       \
+        __builtin_abort();                                  \
+
+/*
+** _Z8ss_floatff:
+**	fcmpe	s0, s1
+**	csinv	(w[0-9]+), wzr, wzr, pl
+**	cset	(w[0-9]+), vs
+**	csinc	w0, \1, \2, ls
+**	ret
+*/
+SPACESHIP_FN(float);
+
+/*
+** _Z16ss_float_no_nansff:
+**	fcmpe	s0, s1
+**	csinv	(w[0-9]+), wzr, wzr, pl
+**	csinc	w0, \1, wzr, ls
+**	ret
+*/
+SPACESHIP_FN_NN(float);
+
+/*
+** _Z9ss_doubledd:
+**	fcmpe	d0, d1
+**	csinv	(w[0-9]+), wzr, wzr, pl
+**	cset	(w[0-9]+), vs
+**	csinc	w0, \1, \2, ls
+**	ret
+*/
+SPACESHIP_FN(double);
+
+/*
+** _Z17ss_double_no_nansdd:
+**	fcmpe	d0, d1
+**	csinv	(w[0-9]+), wzr, wzr, pl
+**	csinc	w0, \1, wzr, ls
+**	ret
+*/
+SPACESHIP_FN_NN(double);
+
+/*
+** _Z14ss_float_idiomff:
+**	fcmpe	s0, s1
+**	csinv	(w[0-9]+), wzr, wzr, pl
+**	mov	(w[0-9]+), -128
+**	csel	(w[0-9]+), \2, wzr, vs
+**	csinc	w0, \1, \3, ls
+**	ret
+*/
+SPACESHIP_FP_IDIOM(float);
+
+/*
+** _Z15ss_double_idiomdd:
+**	fcmpe	d0, d1
+**	csinv	(w[0-9]+), wzr, wzr, pl
+**	mov	(w[0-9]+), -128
+**	csel	(w[0-9]+), \2, wzr, vs
+**	csinc	w0, \1, \3, ls
+**	ret
+*/
+SPACESHIP_FP_IDIOM(double);
+
+/*
+** _Z10ss_int32_tii:
+**	cmp	w0, w1
+**	cset	(w[0-9]+), gt
+**	csinv	w0, \1, wzr, ge
+**	ret
+*/
+SPACESHIP_FN(int32_t);
+
+/*
+** _Z10ss_int64_tll:
+**	cmp	x0, x1
+**	cset	(w[0-9]+), gt
+**	csinv	w0, \1, wzr, ge
+**	ret
+*/
+SPACESHIP_FN(int64_t);
+
+/*
+** _Z11ss_uint32_tjj:
+**	cmp	w0, w1
+**	cset	(w[0-9]+), hi
+**	csinv	w0, \1, wzr, cs
+**	ret
+*/
+SPACESHIP_FN(uint32_t);
+
+/*
+** _Z11ss_uint64_tmm:
+**	cmp	x0, x1
+**	cset	(w[0-9]+), hi
+**	csinv	w0, \1, wzr, cs
+**	ret
+*/
+SPACESHIP_FN(uint64_t);
+
+
+int
+main()
+{
+    /* Single precision floating point.  */
+    RUN_TEST (float, -1.0f, 1.0f, std::partial_ordering::less,);
+    RUN_TEST (float, -1.0f, 1.0f, -1, _idiom);
+
+    RUN_TEST (float, 1.0f, -1.0f, std::partial_ordering::greater,);
+    RUN_TEST (float, 1.0f, -1.0f, 1, _idiom);
+
+    RUN_TEST (float, -1.0f, -1.0f, std::partial_ordering::equivalent,);
+    RUN_TEST (float, -1.0f, -1.0f, 0, _idiom);
+
+    RUN_TEST (float, __builtin_nanf(""), 1.0f, std::partial_ordering::unordered,);
+    RUN_TEST (float, __builtin_nanf(""), 1.0f, IMP_UN, _idiom);
+    RUN_TEST (float, 1.0f ,__builtin_nanf(""), std::partial_ordering::unordered,);
+    RUN_TEST (float, 1.0f, __builtin_nanf(""), IMP_UN, _idiom);
+
+    /* No-NaNs.  */
+    RUN_TEST (float, -1.0f, 1.0f, std::partial_ordering::less, _no_nans);
+    RUN_TEST (float, 1.0f, -1.0f, std::partial_ordering::greater, _no_nans);
+    RUN_TEST (float, -1.0f, -1.0f, std::partial_ordering::equivalent, _no_nans);
+
+    /* Double precision floating point.  */
+    RUN_TEST (double, -1.0f, 1.0f, std::partial_ordering::less,);
+    RUN_TEST (double, -1.0f, 1.0f, -1, _idiom);
+
+    RUN_TEST (double, 1.0f, -1.0f, std::partial_ordering::greater,);
+    RUN_TEST (double, 1.0f, -1.0f, 1, _idiom);
+
+    RUN_TEST (double, -1.0f, -1.0f, std::partial_ordering::equivalent,);
+    RUN_TEST (double, -1.0f, -1.0f, 0, _idiom);
+
+    RUN_TEST (double, __builtin_nanf(""), 1.0f, std::partial_ordering::unordered,);
+    RUN_TEST (double, __builtin_nanf(""), 1.0f, IMP_UN, _idiom);
+    RUN_TEST (double, 1.0f, __builtin_nanf(""), std::partial_ordering::unordered,);
+    RUN_TEST (double, 1.0f, __builtin_nanf(""), IMP_UN, _idiom);
+
+    /* No-NaNs.  */
+    RUN_TEST (double, -1.0f, 1.0f, std::partial_ordering::less, _no_nans);
+    RUN_TEST (double, 1.0f, -1.0f, std::partial_ordering::greater, _no_nans);
+    RUN_TEST (double, -1.0f, -1.0f, std::partial_ordering::equivalent, _no_nans);
+
+    /* Single integer.  */
+    RUN_TEST (int32_t, -42, 0, std::strong_ordering::less,);
+    RUN_TEST (int32_t, 0, -42, std::strong_ordering::greater,);
+    RUN_TEST (int32_t, 42, 42, std::strong_ordering::equal,);
+
+    RUN_TEST (uint32_t, 0, 42, std::strong_ordering::less,);
+    RUN_TEST (uint32_t, 42, 0, std::strong_ordering::greater,);
+    RUN_TEST (uint32_t, 42, 42, std::strong_ordering::equal,);
+
+    /* Double integer.  */
+    RUN_TEST (int64_t, -42, 0, std::strong_ordering::less,);
+    RUN_TEST (int64_t, 42, 0, std::strong_ordering::greater,);
+    RUN_TEST (int64_t, 42, 42, std::strong_ordering::equal,);
+
+    RUN_TEST (uint64_t, 0, 42, std::strong_ordering::less,);
+    RUN_TEST (uint64_t, 42, 0, std::strong_ordering::greater,);
+    RUN_TEST (uint64_t, 42, 42, std::strong_ordering::equal,);
+
+    return 0;
+}
+\ No newline at end of file
diff --git a/gcc/testsuite/g++.target/aarch64/spaceship_2.C b/gcc/testsuite/g++.target/aarch64/spaceship_2.C
new file mode 100644
index 0000000..c1d3900
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/spaceship_2.C
@@ -0,0 +1,72 @@
+// PR117013
+/* { dg-do run } */
+// { dg-options "-O2 -std=c++20 -save-temps" }
+
+#include <compare>
+
+#ifndef fp_type
+#define fp_type float
+#endif
+
+#define TEST_SS_IDIOM(ARGA, ARGB, EXPECT)                   \
+    if (spaceship_idiom ((ARGA), (ARGB)) != (EXPECT))       \
+        __builtin_abort();                                  \
+
+#define TEST_BR_ON_SS(ARGA, ARGB, EXPECT)                   \
+    if(branch_on_spaceship ((ARGA), (ARGB)) != (EXPECT))    \
+        __builtin_abort();                                  \
+
+
+#define RUN_TEST(ARGA, ARGB, EXPECT)                        \
+    TEST_SS_IDIOM(ARGA, ARGB, EXPECT)                       \
+    TEST_BR_ON_SS(ARGA, ARGB, EXPECT)                       \
+
+/*  Test when .SPACESHIP prompts the back end to implement <=> with
+    conditional branches (only applies to floating-point operands).  */
+
+[[gnu::noipa]] auto
+equiv() { return std::partial_ordering::equivalent; }
+[[gnu::noipa]] auto
+less() { return std::partial_ordering::less; }
+[[gnu::noipa]] auto
+greater() { return std::partial_ordering::greater; }
+[[gnu::noipa]] auto
+unordered() { return std::partial_ordering::unordered; }
+
+auto
+spaceship_idiom(fp_type a, fp_type b)
+{
+    if (a == b)
+        return equiv();
+    if (a < b)
+        return less();
+    if (a > b)
+        return greater();
+    return unordered();
+}
+
+auto
+branch_on_spaceship(fp_type a, fp_type b)
+{
+    auto res = a <=> b;
+    if (res == 0)
+        return equiv();
+    else if (res < 0)
+        return less();
+    else if (res > 0)
+        return greater();
+    return unordered();
+}
+
+int
+main()
+{
+    RUN_TEST (-1.0f, 1.0f, std::partial_ordering::less);
+    RUN_TEST (1.0f, -1.0f, std::partial_ordering::greater);
+    RUN_TEST (1.0f, 1.0f, std::partial_ordering::equivalent);
+    RUN_TEST (1.0f, __builtin_nanf(""), std::partial_ordering::unordered);
+    RUN_TEST (__builtin_nanf(""), 1.0f, std::partial_ordering::unordered);
+}
+
+/* { dg-final { scan-assembler-not "\tfcmp\t" } } */
+/* { dg-final { scan-assembler-times "\tfcmpe\t" 2 } } */
+\ No newline at end of file
diff --git a/gcc/testsuite/g++.target/aarch64/spaceship_3.C b/gcc/testsuite/g++.target/aarch64/spaceship_3.C
new file mode 100644
index 0000000..f58b084
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/spaceship_3.C
@@ -0,0 +1,9 @@
+// PR117013
+/* { dg-do run } */
+// { dg-options "-O2 -std=c++20 -save-temps" }
+
+#define fp_type double
+#include "spaceship_2.C"
+
+/* { dg-final { scan-assembler-not "\tfcmp\t" } } */
+/* { dg-final { scan-assembler-times "\tfcmpe\t" 2 } } */
+\ No newline at end of file
author	Spencer Abson <spencer.abson@arm.com>	2025-01-23 19:48:49 +0000
committer	Christophe Lyon <christophe.lyon@arm.com>	2025-04-22 17:07:39 +0200
commit	fabbf99fc8ae26f0d87ff0c8619b4fc3bb4a5da2 (patch)
tree	1d561a9ea2d030dbb40f6909a132e25b96a7c430
parent	5d5e8e87a42af8c0d962fa16dc9835fb71778250 (diff)
download	gcc-fabbf99fc8ae26f0d87ff0c8619b4fc3bb4a5da2.zip gcc-fabbf99fc8ae26f0d87ff0c8619b4fc3bb4a5da2.tar.gz gcc-fabbf99fc8ae26f0d87ff0c8619b4fc3bb4a5da2.tar.bz2