rs6000: Adjust FLOAT128 signbit2 expander for P8 LE [PR114567]

As the associated test case shows, signbit generated assembly is sub-optimal for _Float128 argument from memory on P8 LE. On P8 LE, p8swap pass puts an explicit AND -16 on the memory, which causes mode_dependent_address_p considers it's invalid to change its mode and combine fails to make use of the existing pattern signbit<SIGNBIT:mode>2_dm_mem. Considering it's always more efficient to make use of 8 bytes load and shift on P8 LE, this patch is to adjust the current expander and treat it specially. PR target/114567 gcc/ChangeLog: * config/rs6000/rs6000.md (expander signbit<FLOAT128:mode>2): Adjust. (*signbit<mode>2_dm_mem): Rename to ... (signbit<mode>2_dm_mem): ... this. gcc/testsuite/ChangeLog: * gcc.target/powerpc/pr114567.c: New test.
author: Kewen Lin <linkw@linux.ibm.com> 2024-11-21 07:41:34 +0000
committer: Kewen Lin <linkw@gcc.gnu.org> 2024-11-21 07:41:34 +0000
commit: 10e702789eeabcc88451e34c2a5c7dccb96190a5 (patch)
tree: f21fdfc248078586826b1fb0f8a85a6d7fb740f6
parent: baf536754f615c808f02592b765cdd900f240359 (diff)
download: gcc-10e702789eeabcc88451e34c2a5c7dccb96190a5.zip
gcc-10e702789eeabcc88451e34c2a5c7dccb96190a5.tar.gz
gcc-10e702789eeabcc88451e34c2a5c7dccb96190a5.tar.bz2
2 files changed, 35 insertions, 4 deletions
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index ca91a24..95be36d 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5287,7 +5287,7 @@
 ;; when little-endian.
 (define_expand "signbit<mode>2"
   [(set (match_dup 2)
-	(float_truncate:DF (match_operand:FLOAT128 1 "gpc_reg_operand")))
+	(float_truncate:DF (match_operand:FLOAT128 1 "reg_or_mem_operand")))
    (set (match_dup 3)
    	(subreg:DI (match_dup 2) 0))
    (set (match_dup 4)
@@ -5303,12 +5303,26 @@
       rtx dest = operands[0];
       rtx src = operands[1];
       rtx tmp = gen_reg_rtx (DImode);
+      /* For P8 LE, we generate memory access with subreg:V1TI which
+         prevents the related gen_signbitkf2_dm_mem being matched so
+         directly emit it here and leave the other cases alone.  */
+      if (!BYTES_BIG_ENDIAN
+          && !TARGET_P9_VECTOR
+          && memory_operand (src, <MODE>mode))
+        emit_insn (gen_signbitkf2_dm_mem (tmp, src));
+      else
+        {
+          if (!gpc_reg_operand (src, <MODE>mode))
+            src = copy_to_mode_reg (<MODE>mode, src);
+          gcc_assert (gpc_reg_operand (src, <MODE>mode));
+          emit_insn (gen_signbit2_dm (<MODE>mode, tmp, src));
+        }
       rtx dest_di = gen_lowpart (DImode, dest);
-
-      emit_insn (gen_signbit2_dm (<MODE>mode, tmp, src));
       emit_insn (gen_lshrdi3 (dest_di, tmp, GEN_INT (63)));
       DONE;
     }
+  if (!gpc_reg_operand (operands[1], <MODE>mode))
+    operands[1] = copy_to_mode_reg (<MODE>mode, operands[1]);
   operands[2] = gen_reg_rtx (DFmode);
   operands[3] = gen_reg_rtx (DImode);
   if (TARGET_POWERPC64)
@@ -5354,7 +5368,7 @@
 ;; Optimize IEEE 128-bit signbit on to avoid loading the value into a vector
 ;; register and then doing a direct move if the value comes from memory.  On
 ;; little endian, we have to load the 2nd double-word to get the sign bit.
-(define_insn_and_split "*signbit<mode>2_dm_mem"
+(define_insn_and_split "signbit<mode>2_dm_mem"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=b")
 	(unspec:DI [(match_operand:SIGNBIT 1 "memory_operand" "m")]
 		   UNSPEC_SIGNBIT))]
diff --git a/gcc/testsuite/gcc.target/powerpc/pr114567.c b/gcc/testsuite/gcc.target/powerpc/pr114567.c
new file mode 100644
index 0000000..b904387
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr114567.c
@@ -0,0 +1,17 @@
+/* { dg-options "-O2 -mabi=ibmlongdouble -Wno-psabi" } */
+/* { dg-additional-options "-mdejagnu-cpu=power8" { target { ! has_arch_pwr8 } } } */
+/* { dg-require-effective-target powerpc_vsx } */
+/* { dg-require-effective-target float128 } */
+
+/* Verify there is no lxv.*x? and mfvsrd (vector load and move).  */
+
+int
+sbm (_Float128 *a)
+{
+  return __builtin_signbit (*a);
+}
+
+/* { dg-final { scan-assembler-times {\ml(d|wz)\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mlxv\M} } } */
+/* { dg-final { scan-assembler-not {\mlxvd2x\M} } } */
+/* { dg-final { scan-assembler-not {\mmfvsrd\M} } } */
author	Kewen Lin <linkw@linux.ibm.com>	2024-11-21 07:41:34 +0000
committer	Kewen Lin <linkw@gcc.gnu.org>	2024-11-21 07:41:34 +0000
commit	10e702789eeabcc88451e34c2a5c7dccb96190a5 (patch)
tree	f21fdfc248078586826b1fb0f8a85a6d7fb740f6
parent	baf536754f615c808f02592b765cdd900f240359 (diff)
download	gcc-10e702789eeabcc88451e34c2a5c7dccb96190a5.zip gcc-10e702789eeabcc88451e34c2a5c7dccb96190a5.tar.gz gcc-10e702789eeabcc88451e34c2a5c7dccb96190a5.tar.bz2