vsx.md (VSX_EXTRACT_FL): New iterator for all binary floating point types supported by the hardware...

[gcc] 2016-10-31 Michael Meissner <meissner@linux.vnet.ibm.com> * config/rs6000/vsx.md (VSX_EXTRACT_FL): New iterator for all binary floating point types supported by the hardware except for double. (vsx_xvcvsxwdp_df): Provide scalar result alternative to the vector instruction for optimizing extracting a SImode from a V4SImode vector and converting it to floating point. (vsx_xvcvuxwdp_df): Likewise. (vsx_extract_si): On ISA 3.0, allow extract target and temporary registers to be any VSX register. Move stores to the end of the constraints. (vsx_extract_si_<uns>float_df): New combiner pattern and splitter to optimize extracting a SImode from a V4SImode vector and converting it to a binary floating point type supported by the hardware. Use the vector converts instead of extracting the element, sign extending it, and then converting it to double. Other floating point types than double first convert to double, then the double is converted to that type. (vsx_extract_si_<uns>float_<mode>): Likewise. [gcc/testsuite] 2016-10-31 Michael Meissner <meissner@linux.vnet.ibm.com> * gcc.target/powerpc/vsx-extract-4.c: New test. * gcc.target/powerpc/vsx-extract-5.c: Likewise. From-SVN: r241731
author: Michael Meissner <meissner@linux.vnet.ibm.com> 2016-11-01 00:41:30 +0000
committer: Michael Meissner <meissner@gcc.gnu.org> 2016-11-01 00:41:30 +0000
commit: 156b5ccae7b1542a23d4d42b0a6cf9672c7b6e7d (patch)
tree: 0b64ac057b25bef1487ad672b70544c9e0daf4ab /gcc
parent: fb4c92aabcbb34c33584d4c60df8d9baed3dca7a (diff)
download: gcc-156b5ccae7b1542a23d4d42b0a6cf9672c7b6e7d.zip
gcc-156b5ccae7b1542a23d4d42b0a6cf9672c7b6e7d.tar.gz
gcc-156b5ccae7b1542a23d4d42b0a6cf9672c7b6e7d.tar.bz2
5 files changed, 312 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d67a20f..7dd239d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,24 @@
+2016-10-31  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/vsx.md (VSX_EXTRACT_FL): New iterator for all
+	binary floating point types supported by the hardware except for
+	double.
+	(vsx_xvcvsxwdp_df): Provide scalar result alternative to the
+	vector instruction for optimizing extracting a SImode from a
+	V4SImode vector and converting it to floating point.
+	(vsx_xvcvuxwdp_df): Likewise.
+	(vsx_extract_si): On ISA 3.0, allow extract target and temporary
+	registers to be any VSX register.  Move stores to the end of the
+	constraints.
+	(vsx_extract_si_<uns>float_df): New combiner pattern and splitter
+	to optimize extracting a SImode from a V4SImode vector and
+	converting it to a binary floating point type supported by the
+	hardware.  Use the vector converts instead of extracting the
+	element, sign extending it, and then converting it to double.
+	Other floating point types  than double first convert to double,
+	then the double is converted to that type.
+	(vsx_extract_si_<uns>float_<mode>): Likewise.
+
 2016-10-31  Andrew Pinski  <apinski@cavium.com>
 
 	* config/aarch64/driver-aarch64.c (host_detect_local_cpu):
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 18f3e86..505c270 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -288,6 +288,16 @@
 			  (V8HI  "v")
 			  (V4SI  "wa")])
 
+;; Mode iterator for binary floating types other than double to
+;; optimize convert to that floating point type from an extract
+;; of an integer type
+(define_mode_iterator VSX_EXTRACT_FL [SF
+				      (IF "FLOAT128_2REG_P (IFmode)")
+				      (KF "TARGET_FLOAT128_HW")
+				      (TF "FLOAT128_2REG_P (TFmode)
+					   || (FLOAT128_IEEE_P (TFmode)
+					       && TARGET_FLOAT128_HW)")])
+
 ;; Iterator for the 2 short vector types to do a splat from an integer
 (define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
 
@@ -1907,6 +1917,7 @@
   [(set_attr "type" "vecdouble")])
 
 ;; Convert from 32-bit to 64-bit types
+;; Provide both vector and scalar targets
 (define_insn "vsx_xvcvsxwdp"
   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
 	(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
@@ -1915,6 +1926,14 @@
   "xvcvsxwdp %x0,%x1"
   [(set_attr "type" "vecdouble")])
 
+(define_insn "vsx_xvcvsxwdp_df"
+  [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
+	(unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
+		   UNSPEC_VSX_CVSXWDP))]
+  "TARGET_VSX"
+  "xvcvsxwdp %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
 (define_insn "vsx_xvcvuxwdp"
   [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
 	(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
@@ -1923,6 +1942,14 @@
   "xvcvuxwdp %x0,%x1"
   [(set_attr "type" "vecdouble")])
 
+(define_insn "vsx_xvcvuxwdp_df"
+  [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
+	(unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
+		   UNSPEC_VSX_CVUXWDP))]
+  "TARGET_VSX"
+  "xvcvuxwdp %x0,%x1"
+  [(set_attr "type" "vecdouble")])
+
 (define_insn "vsx_xvcvspsxds"
   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
 	(unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
@@ -2574,11 +2601,11 @@
   [(set_attr "type" "vecsimple")])
 
 (define_insn_and_split  "*vsx_extract_si"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,Z,Z,wJwK")
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
 	(vec_select:SI
-	 (match_operand:V4SI 1 "gpc_reg_operand" "v,wJwK,v,v")
-	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
-   (clobber (match_scratch:V4SI 3 "=v,wJwK,v,v"))]
+	 (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
+	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
+   (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
   "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& reload_completed"
@@ -2628,7 +2655,7 @@
 
   DONE;
 }
-  [(set_attr "type" "mftgpr,fpstore,fpstore,vecsimple")
+  [(set_attr "type" "mftgpr,vecperm,fpstore")
    (set_attr "length" "8")])
 
 (define_insn_and_split  "*vsx_extract_<mode>_p8"
@@ -2714,6 +2741,107 @@
   DONE;
 })
 
+;; VSX_EXTRACT optimizations
+;; Optimize double d = (double) vec_extract (vi, <n>)
+;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
+(define_insn_and_split "*vsx_extract_si_<uns>float_df"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
+	(any_float:DF
+	 (vec_select:SI
+	  (match_operand:V4SI 1 "gpc_reg_operand" "v")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx element = operands[2];
+  rtx v4si_tmp = operands[3];
+  int value;
+
+  if (!VECTOR_ELT_ORDER_BIG)
+    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
+
+  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
+     instruction.  */
+  value = INTVAL (element);
+  if (value != 0)
+    {
+      if (GET_CODE (v4si_tmp) == SCRATCH)
+	v4si_tmp = gen_reg_rtx (V4SImode);
+      emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
+    }
+  else
+    v4si_tmp = src;
+
+  emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
+  DONE;
+})
+
+;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
+;; where <type> is a floating point type that supported by the hardware that is
+;; not double.  First convert the value to double, and then to the desired
+;; type.
+(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
+  [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
+	(any_float:VSX_EXTRACT_FL
+	 (vec_select:SI
+	  (match_operand:V4SI 1 "gpc_reg_operand" "v")
+	  (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
+   (clobber (match_scratch:V4SI 3 "=v"))
+   (clobber (match_scratch:DF 4 "=ws"))]
+  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx element = operands[2];
+  rtx v4si_tmp = operands[3];
+  rtx df_tmp = operands[4];
+  int value;
+
+  if (!VECTOR_ELT_ORDER_BIG)
+    element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
+
+  /* If the value is in the correct position, we can avoid doing the VSPLT<x>
+     instruction.  */
+  value = INTVAL (element);
+  if (value != 0)
+    {
+      if (GET_CODE (v4si_tmp) == SCRATCH)
+	v4si_tmp = gen_reg_rtx (V4SImode);
+      emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
+    }
+  else
+    v4si_tmp = src;
+
+  if (GET_CODE (df_tmp) == SCRATCH)
+    df_tmp = gen_reg_rtx (DFmode);
+
+  emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
+
+  if (<MODE>mode == SFmode)
+    emit_insn (gen_truncdfsf2 (dest, df_tmp));
+  else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
+    emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
+  else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
+	   && TARGET_FLOAT128_HW)
+    emit_insn (gen_extenddftf2_hw (dest, df_tmp));
+  else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
+    emit_insn (gen_extenddfif2 (dest, df_tmp));
+  else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
+    emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
+  else
+    gcc_unreachable ();
+
+  DONE;
+})
+
 ;; Expanders for builtins
 (define_expand "vsx_mergel_<mode>"
   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 36b4d8e..aed8a66 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-10-31  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* gcc.target/powerpc/vsx-extract-4.c: New test.
+	* gcc.target/powerpc/vsx-extract-5.c: Likewise.
+
 2016-10-31  Jerry DeLisle  <jvdelisle@gcc.gnu.org>
 
 	PR fortran/54679
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-extract-4.c b/gcc/testsuite/gcc.target/powerpc/vsx-extract-4.c
new file mode 100644
index 0000000..3b498f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-extract-4.c
@@ -0,0 +1,76 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-O2 -mcpu=power8" } */
+
+/* { dg-final { scan-assembler-times "vspltw"    6 } } */
+/* { dg-final { scan-assembler-times "xvcvsxwdp" 4 } } */
+/* { dg-final { scan-assembler-times "xvcvuxwdp" 4 } } */
+/* { dg-final { scan-assembler-not   "mtvsrd"      } } */
+/* { dg-final { scan-assembler-not   "mtvsrwa"     } } */
+/* { dg-final { scan-assembler-not   "mtvsrwz"     } } */
+/* { dg-final { scan-assembler-not   "mfvsrd"      } } */
+/* { dg-final { scan-assembler-not   "mfvsrwz"     } } */
+
+#include <altivec.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif
+
+TYPE
+foo_0s (vector int v)
+{
+  int i = vec_extract (v, 0);
+  return (TYPE) i;
+}
+
+TYPE
+foo_1s (vector int v)
+{
+  int i = vec_extract (v, 1);
+  return (TYPE) i;
+}
+
+TYPE
+foo_2s (vector int v)
+{
+  int i = vec_extract (v, 2);
+  return (TYPE) i;
+}
+
+TYPE
+foo_3s (vector int v)
+{
+  int i = vec_extract (v, 3);
+  return (TYPE) i;
+}
+
+TYPE
+foo_0u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 0);
+  return (TYPE) u;
+}
+
+TYPE
+foo_1u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 1);
+  return (TYPE) u;
+}
+
+TYPE
+foo_2u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 2);
+  return (TYPE) u;
+}
+
+TYPE
+foo_3u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 3);
+  return (TYPE) u;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-extract-5.c b/gcc/testsuite/gcc.target/powerpc/vsx-extract-5.c
new file mode 100644
index 0000000..1338c6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-extract-5.c
@@ -0,0 +1,77 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-O2 -mcpu=power8" } */
+
+/* { dg-final { scan-assembler-times "vspltw"      6 } } */
+/* { dg-final { scan-assembler-times "xvcvsxwdp"   4 } } */
+/* { dg-final { scan-assembler-times "xvcvuxwdp"   4 } } */
+/* { dg-final { scan-assembler-times "frsp\|xsrsp" 8 } } */
+/* { dg-final { scan-assembler-not   "mtvsrd"        } } */
+/* { dg-final { scan-assembler-not   "mtvsrwa"       } } */
+/* { dg-final { scan-assembler-not   "mtvsrwz"       } } */
+/* { dg-final { scan-assembler-not   "mfvsrd"        } } */
+/* { dg-final { scan-assembler-not   "mfvsrwz"       } } */
+
+#include <altivec.h>
+
+#ifndef TYPE
+#define TYPE float
+#endif
+
+TYPE
+foo_0s (vector int v)
+{
+  int i = vec_extract (v, 0);
+  return (TYPE) i;
+}
+
+TYPE
+foo_1s (vector int v)
+{
+  int i = vec_extract (v, 1);
+  return (TYPE) i;
+}
+
+TYPE
+foo_2s (vector int v)
+{
+  int i = vec_extract (v, 2);
+  return (TYPE) i;
+}
+
+TYPE
+foo_3s (vector int v)
+{
+  int i = vec_extract (v, 3);
+  return (TYPE) i;
+}
+
+TYPE
+foo_0u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 0);
+  return (TYPE) u;
+}
+
+TYPE
+foo_1u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 1);
+  return (TYPE) u;
+}
+
+TYPE
+foo_2u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 2);
+  return (TYPE) u;
+}
+
+TYPE
+foo_3u (vector unsigned int v)
+{
+  unsigned int u = vec_extract (v, 3);
+  return (TYPE) u;
+}
author	Michael Meissner <meissner@linux.vnet.ibm.com>	2016-11-01 00:41:30 +0000
committer	Michael Meissner <meissner@gcc.gnu.org>	2016-11-01 00:41:30 +0000
commit	156b5ccae7b1542a23d4d42b0a6cf9672c7b6e7d (patch)
tree	0b64ac057b25bef1487ad672b70544c9e0daf4ab /gcc
parent	fb4c92aabcbb34c33584d4c60df8d9baed3dca7a (diff)
download	gcc-156b5ccae7b1542a23d4d42b0a6cf9672c7b6e7d.zip gcc-156b5ccae7b1542a23d4d42b0a6cf9672c7b6e7d.tar.gz gcc-156b5ccae7b1542a23d4d42b0a6cf9672c7b6e7d.tar.bz2