AArch64: Handle copysign (x, -1) expansion efficiently

copysign (x, -1) is effectively fneg (abs (x)) which on AArch64 can be most efficiently done by doing an OR of the signbit. The middle-end will optimize fneg (abs (x)) now to copysign as the canonical form and so this optimizes the expansion. If the target has an inclusive-OR that takes an immediate, then the transformed instruction is both shorter and faster. For those that don't, the immediate has to be separately constructed, but this still ends up being faster as the immediate construction is not on the critical path. Note that this is part of another patch series, the additional testcases are mutually dependent on the match.pd patch. As such the tests are added there insteadof here. gcc/ChangeLog: PR tree-optimization/109154 * config/aarch64/aarch64.md (copysign<GPF:mode>3): Handle copysign (x, -1). * config/aarch64/aarch64-simd.md (copysign<mode>3): Likewise. * config/aarch64/aarch64-sve.md (copysign<mode>3): Likewise.
author: Tamar Christina <tamar.christina@arm.com> 2023-11-09 14:04:57 +0000
committer: Tamar Christina <tamar.christina@arm.com> 2023-11-09 14:18:52 +0000
commit: ed2e058c58ab064fe3a26bc4a47a5d0a47350f97 (patch)
tree: 0c807a80402401b3c3b222002239b66e356857b8
parent: ffd40d3b233d63c925cceb0dcd5a4fc8925e2993 (diff)
download: gcc-ed2e058c58ab064fe3a26bc4a47a5d0a47350f97.zip
gcc-ed2e058c58ab064fe3a26bc4a47a5d0a47350f97.tar.gz
gcc-ed2e058c58ab064fe3a26bc4a47a5d0a47350f97.tar.bz2
3 files changed, 57 insertions, 10 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 98c418c..c6f2d58 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -754,15 +754,33 @@
 (define_expand "copysign<mode>3"
   [(match_operand:VHSDF 0 "register_operand")
    (match_operand:VHSDF 1 "register_operand")
-   (match_operand:VHSDF 2 "register_operand")]
+   (match_operand:VHSDF 2 "nonmemory_operand")]
   "TARGET_SIMD"
 {
-  rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
+  machine_mode int_mode = <V_INT_EQUIV>mode;
+  rtx v_bitmask = gen_reg_rtx (int_mode);
   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 
   emit_move_insn (v_bitmask,
 		  aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 						     HOST_WIDE_INT_M1U << bits));
+
+  /* copysign (x, -1) should instead be expanded as orr with the sign
+     bit.  */
+  if (!REG_P (operands[2]))
+    {
+      rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
+      if (GET_CODE (op2_elt) == CONST_DOUBLE
+	  && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
+	{
+	  emit_insn (gen_ior<v_int_equiv>3 (
+	    lowpart_subreg (int_mode, operands[0], <MODE>mode),
+	    lowpart_subreg (int_mode, operands[1], <MODE>mode), v_bitmask));
+	  DONE;
+	}
+    }
+
+  operands[2] = force_reg (<MODE>mode, operands[2]);
   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 					 operands[2], operands[1]));
   DONE;
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 5a652d8..cb07c61 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -6387,7 +6387,7 @@
 (define_expand "copysign<mode>3"
   [(match_operand:SVE_FULL_F 0 "register_operand")
    (match_operand:SVE_FULL_F 1 "register_operand")
-   (match_operand:SVE_FULL_F 2 "register_operand")]
+   (match_operand:SVE_FULL_F 2 "nonmemory_operand")]
   "TARGET_SVE"
   {
     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
@@ -6398,11 +6398,26 @@
     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
 
-    emit_insn (gen_and<v_int_equiv>3
-	       (sign, arg2,
-		aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
-						   HOST_WIDE_INT_M1U
-						   << bits)));
+    rtx v_sign_bitmask
+      = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+					   HOST_WIDE_INT_M1U << bits);
+
+    /* copysign (x, -1) should instead be expanded as orr with the sign
+       bit.  */
+    if (!REG_P (operands[2]))
+      {
+	rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
+	if (GET_CODE (op2_elt) == CONST_DOUBLE
+	    && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
+	  {
+	    emit_insn (gen_ior<v_int_equiv>3 (int_res, arg1, v_sign_bitmask));
+	    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+	    DONE;
+	  }
+      }
+
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+    emit_insn (gen_and<v_int_equiv>3 (sign, arg2, v_sign_bitmask));
     emit_insn (gen_and<v_int_equiv>3
 	       (mant, arg1,
 		aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c6b1506..7be1de3 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6977,12 +6977,26 @@
 (define_expand "copysign<GPF:mode>3"
   [(match_operand:GPF 0 "register_operand")
    (match_operand:GPF 1 "register_operand")
-   (match_operand:GPF 2 "register_operand")]
+   (match_operand:GPF 2 "nonmemory_operand")]
   "TARGET_SIMD"
 {
-  rtx bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
+  machine_mode int_mode = <V_INT_EQUIV>mode;
+  rtx bitmask = gen_reg_rtx (int_mode);
   emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U
 				    << (GET_MODE_BITSIZE (<MODE>mode) - 1)));
+  /* copysign (x, -1) should instead be expanded as orr with the sign
+     bit.  */
+  rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
+  if (GET_CODE (op2_elt) == CONST_DOUBLE
+      && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
+    {
+      emit_insn (gen_ior<v_int_equiv>3 (
+	lowpart_subreg (int_mode, operands[0], <MODE>mode),
+	lowpart_subreg (int_mode, operands[1], <MODE>mode), bitmask));
+      DONE;
+    }
+
+  operands[2] = force_reg (<MODE>mode, operands[2]);
   emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
 				       bitmask));
   DONE;
author	Tamar Christina <tamar.christina@arm.com>	2023-11-09 14:04:57 +0000
committer	Tamar Christina <tamar.christina@arm.com>	2023-11-09 14:18:52 +0000
commit	ed2e058c58ab064fe3a26bc4a47a5d0a47350f97 (patch)
tree	0c807a80402401b3c3b222002239b66e356857b8
parent	ffd40d3b233d63c925cceb0dcd5a4fc8925e2993 (diff)
download	gcc-ed2e058c58ab064fe3a26bc4a47a5d0a47350f97.zip gcc-ed2e058c58ab064fe3a26bc4a47a5d0a47350f97.tar.gz gcc-ed2e058c58ab064fe3a26bc4a47a5d0a47350f97.tar.bz2