re PR target/83862 (powerpc: ICE in signbit testcase)

[gcc] 2018-01-22 Michael Meissner <meissner@linux.vnet.ibm.com> PR target/83862 * config/rs6000/rs6000-protos.h (rs6000_split_signbit): Delete, no longer used. * config/rs6000/rs6000.c (rs6000_split_signbit): Likewise. * config/rs6000/rs6000.md (signbit<mode>2): Change code for IEEE 128-bit to produce an UNSPEC move to get the double word with the signbit and then a shift directly to do signbit. (signbit<mode>2_dm): Replace old IEEE 128-bit signbit implementation with a new version that just does either a direct move or a regular move. Move memory interface to separate insns. Move insns so they are next to the expander. (signbit<mode>2_dm_mem_be): New combiner insns to combine load with signbit move. Split big and little endian case. (signbit<mode>2_dm_mem_le): Likewise. (signbit<mode>2_dm_<su>ext): Delete, no longer used. (signbit<mode>2_dm2): Likewise. [gcc/testsuite] 2018-01-22 Michael Meissner <meissner@linux.vnet.ibm.com> PR target/83862 * gcc.target/powerpc/pr83862.c: New test. From-SVN: r256959
author: Michael Meissner <meissner@linux.vnet.ibm.com> 2018-01-22 19:36:18 +0000
committer: Michael Meissner <meissner@gcc.gnu.org> 2018-01-22 19:36:18 +0000
commit: de0ecff83d8639cfa0075fab7a5f9a42657dd94e (patch)
tree: 95c285569c28af1b5a5b74b11206c8095f905878
parent: bc8b0d04284de4288cae4e4ab3bc2d6c36d36245 (diff)
download: gcc-de0ecff83d8639cfa0075fab7a5f9a42657dd94e.zip
gcc-de0ecff83d8639cfa0075fab7a5f9a42657dd94e.tar.gz
gcc-de0ecff83d8639cfa0075fab7a5f9a42657dd94e.tar.bz2
6 files changed, 127 insertions, 94 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 86a19bc..bfcf4da 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,22 @@
+2018-01-22  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/83862
+	* config/rs6000/rs6000-protos.h (rs6000_split_signbit): Delete,
+	no longer used.
+	* config/rs6000/rs6000.c (rs6000_split_signbit): Likewise.
+	* config/rs6000/rs6000.md (signbit<mode>2): Change code for IEEE
+	128-bit to produce an UNSPEC move to get the double word with the
+	signbit and then a shift directly to do signbit.
+	(signbit<mode>2_dm): Replace old IEEE 128-bit signbit
+	implementation with a new version that just does either a direct
+	move or a regular move.  Move memory interface to separate insns.
+	Move insns so they are next to the expander.
+	(signbit<mode>2_dm_mem_be): New combiner insns to combine load
+	with signbit move.  Split big and little endian case.
+	(signbit<mode>2_dm_mem_le): Likewise.
+	(signbit<mode>2_dm_<su>ext): Delete, no longer used.
+	(signbit<mode>2_dm2): Likewise.
+
 2018-01-22  Sebastian Perta  <sebastian.perta@renesas.com>
 
 	* config/rl78/rl78.md: New define_expand "anddi3".
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 9c6c9a6..3cb5ee8 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -132,7 +132,6 @@ extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx);
 extern int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
 extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx);
 extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx);
-extern void rs6000_split_signbit (rtx, rtx);
 extern void rs6000_expand_atomic_compare_and_swap (rtx op[]);
 extern rtx swap_endian_selector_for_mode (machine_mode mode);
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 47e07cf..b457b2a 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -23424,49 +23424,6 @@ rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
     emit_move_insn (dest, target);
 }
 
-/* Split a signbit operation on 64-bit machines with direct move.  Also allow
-   for the value to come from memory or if it is already loaded into a GPR.  */
-
-void
-rs6000_split_signbit (rtx dest, rtx src)
-{
-  machine_mode d_mode = GET_MODE (dest);
-  machine_mode s_mode = GET_MODE (src);
-  rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
-  rtx shift_reg = dest_di;
-
-  gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64);
-
-  if (MEM_P (src))
-    {
-      rtx mem = (WORDS_BIG_ENDIAN
-		 ? adjust_address (src, DImode, 0)
-		 : adjust_address (src, DImode, 8));
-      emit_insn (gen_rtx_SET (dest_di, mem));
-    }
-
-  else
-    {
-      unsigned int r = reg_or_subregno (src);
-
-      if (INT_REGNO_P (r))
-	shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0));
-
-      else
-	{
-	  /* Generate the special mfvsrd instruction to get it in a GPR.  */
-	  gcc_assert (VSX_REGNO_P (r));
-	  if (s_mode == KFmode)
-	    emit_insn (gen_signbitkf2_dm2 (dest_di, src));
-	  else
-	    emit_insn (gen_signbittf2_dm2 (dest_di, src));
-	}
-    }
-
-  emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
-  return;
-}
-
 /* A subroutine of the atomic operation splitters.  Jump to LABEL if
    COND is true.  Mark the jump as unlikely to be taken.  */
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 757ec38..3707566 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -4779,12 +4779,19 @@
 {
   if (FLOAT128_IEEE_P (<MODE>mode))
     {
+      rtx dest = operands[0];
+      rtx src = operands[1];
+      rtx tmp = gen_reg_rtx (DImode);
+      rtx dest_di = gen_lowpart (DImode, dest);
+
       if (<MODE>mode == KFmode)
-	emit_insn (gen_signbitkf2_dm (operands[0], operands[1]));
+	emit_insn (gen_signbitkf2_dm (tmp, src));
       else if (<MODE>mode == TFmode)
-	emit_insn (gen_signbittf2_dm (operands[0], operands[1]));
+	emit_insn (gen_signbittf2_dm (tmp, src));
       else
 	gcc_unreachable ();
+
+      emit_insn (gen_lshrdi3 (dest_di, tmp, GEN_INT (63)));
       DONE;
     }
   operands[2] = gen_reg_rtx (DFmode);
@@ -4805,6 +4812,66 @@
     }
 })
 
+;; Optimize IEEE 128-bit signbit on 64-bit systems with direct move to avoid
+;; multiple direct moves.  If we used a SUBREG:DI of the Floa128 type, the
+;; register allocator would typically move the entire _Float128 item to GPRs (2
+;; instructions on ISA 3.0, 3-4 instructions on ISA 2.07).
+;;
+;; After register allocation, if the _Float128 had originally been in GPRs, the
+;; split allows the post reload phases to eliminate the move, and do the shift
+;; directly with the register that contains the signbit.
+(define_insn_and_split "signbit<mode>2_dm"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
+	(unspec:DI [(match_operand:SIGNBIT 1 "gpc_reg_operand" "wa,r")]
+		   UNSPEC_SIGNBIT))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "@
+   mfvsrd %0,%x1
+   #"
+  "&& reload_completed && int_reg_operand (operands[1], <MODE>mode)"
+  [(set (match_dup 0)
+	(match_dup 2))]
+{
+  operands[2] = gen_highpart (DImode, operands[1]);
+}
+ [(set_attr "type" "mftgpr,*")])
+
+;; Optimize IEEE 128-bit signbit on to avoid loading the value into a vector
+;; register and then doing a direct move if the value comes from memory.  On
+;; little endian, we have to load the 2nd double-word to get the sign bit.
+(define_insn_and_split "*signbit<mode>2_dm_mem"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=b")
+	(unspec:DI [(match_operand:SIGNBIT 1 "memory_operand" "m")]
+		   UNSPEC_SIGNBIT))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(match_dup 2))]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx addr = XEXP (src, 0);
+
+  if (WORDS_BIG_ENDIAN)
+    operands[2] = adjust_address (src, DImode, 0);
+
+  else if (REG_P (addr) || SUBREG_P (addr))
+    operands[2] = adjust_address (src, DImode, 8);
+
+  else if (GET_CODE (addr) == PLUS && REG_P (XEXP (addr, 0))
+	   && CONST_INT_P (XEXP (addr, 1)) && mem_operand_gpr (src, DImode))
+    operands[2] = adjust_address (src, DImode, 8);
+
+  else
+    {
+      rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (DImode) : dest;
+      emit_insn (gen_rtx_SET (tmp, addr));
+      operands[2] = change_address (src, DImode,
+				    gen_rtx_PLUS (DImode, tmp, GEN_INT (8)));
+    }
+})
+
 (define_expand "copysign<mode>3"
   [(set (match_dup 3)
         (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))
@@ -4834,54 +4901,6 @@
    operands[5] = CONST0_RTX (<MODE>mode);
   })
 
-;; Optimize signbit on 64-bit systems with direct move to avoid doing the store
-;; and load.
-(define_insn_and_split "signbit<mode>2_dm"
-  [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r")
-	(unspec:SI
-	 [(match_operand:SIGNBIT 1 "input_operand" "wa,m,r")]
-	 UNSPEC_SIGNBIT))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  rs6000_split_signbit (operands[0], operands[1]);
-  DONE;
-}
- [(set_attr "length" "8,8,4")
-  (set_attr "type" "mftgpr,load,integer")])
-
-(define_insn_and_split "*signbit<mode>2_dm_<su>ext"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r")
-	(any_extend:DI
-	 (unspec:SI
-	  [(match_operand:SIGNBIT 1 "input_operand" "wa,m,r")]
-	  UNSPEC_SIGNBIT)))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  rs6000_split_signbit (operands[0], operands[1]);
-  DONE;
-}
- [(set_attr "length" "8,8,4")
-  (set_attr "type" "mftgpr,load,integer")])
-
-;; TARGET_MODES_TIEABLE_P doesn't allow DImode to be tied with the various
-;; floating point types, which makes normal SUBREG's problematical.  Instead
-;; use a special pattern to avoid using a normal movdi.
-(define_insn "signbit<mode>2_dm2"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
-	(unspec:DI [(match_operand:SIGNBIT 1 "gpc_reg_operand" "wa")
-		    (const_int 0)]
-		   UNSPEC_SIGNBIT))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
-  "mfvsrd %0,%x1"
- [(set_attr "type" "mftgpr")])
-
-
 ;; Use an unspec rather providing an if-then-else in RTL, to prevent the
 ;; compiler from optimizing -0.0
 (define_insn "copysign<mode>3_fcpsgn"
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 03b9c1a..485079a 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2018-01-22  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/83862
+	* gcc.target/powerpc/pr83862.c: New test.
+
 2018-01-22  Carl Love  <cel@us.ibm.com>
 	* gcc.target/powerpc/powerpc.exp: Add torture tests for
 	builtins-4-runnable.c, builtins-6-runnable.c,
diff --git a/gcc/testsuite/gcc.target/powerpc/pr83862.c b/gcc/testsuite/gcc.target/powerpc/pr83862.c
new file mode 100644
index 0000000..3cadb57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr83862.c
@@ -0,0 +1,34 @@
+/* PR target/83862.c */
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target ppc_float128_sw } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mpower8-vector -O2 -mabi=ieeelongdouble -Wno-psabi" } */
+
+/* On little endian systems, optimizing signbit of IEEE 128-bit values from
+   memory could abort if the memory address was indexed (reg+reg).  The
+   optimization is only on 64-bit machines with direct move.
+
+   Compile with -g -O2 -mabi=ieeelongdouble -Wno-psabi.  */
+
+#ifndef TYPE
+#define TYPE long double
+#endif
+
+int sbr (TYPE a) { return __builtin_signbit (a); }
+int sbm (TYPE *a) { return __builtin_signbit (*a); }
+int sbo (TYPE *a) { return __builtin_signbit (a[4]); }
+int sbi (TYPE *a, unsigned long n) { return __builtin_signbit (a[n]); }
+void sbs (int *p, TYPE a) { *p = __builtin_signbit (a); }
+
+/* On big endian systems, this will generate 2 LDs and 1 LDX, while on
+   little endian systems, this will generate 3 LDs and an ADD.  */
+
+/* { dg-final { scan-assembler-times {\mldx?\M}    3 } } */
+/* { dg-final { scan-assembler-times {\mmfvsrd\M}  2 } } */
+/* { dg-final { scan-assembler-times {\msrdi\M}    5 } } */
+/* { dg-final { scan-assembler-not   {\mmfvsrld\M}   } } */
+/* { dg-final { scan-assembler-not   {\mstxvx?\M}    } } */
+/* { dg-final { scan-assembler-not   {\mstxvw4x\M}   } } */
+/* { dg-final { scan-assembler-not   {\mstxvd2x\M}   } } */
+/* { dg-final { scan-assembler-not   {\mstvx\M}      } } */
+
author	Michael Meissner <meissner@linux.vnet.ibm.com>	2018-01-22 19:36:18 +0000
committer	Michael Meissner <meissner@gcc.gnu.org>	2018-01-22 19:36:18 +0000
commit	de0ecff83d8639cfa0075fab7a5f9a42657dd94e (patch)
tree	95c285569c28af1b5a5b74b11206c8095f905878
parent	bc8b0d04284de4288cae4e4ab3bc2d6c36d36245 (diff)
download	gcc-de0ecff83d8639cfa0075fab7a5f9a42657dd94e.zip gcc-de0ecff83d8639cfa0075fab7a5f9a42657dd94e.tar.gz gcc-de0ecff83d8639cfa0075fab7a5f9a42657dd94e.tar.bz2