aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog24
-rw-r--r--gcc/config/rs6000/rs6000.c20
-rw-r--r--gcc/config/rs6000/vector.md49
-rw-r--r--gcc/config/rs6000/vsx.md196
-rw-r--r--gcc/testsuite/ChangeLog8
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr48258-1.c57
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr48258-2.c58
7 files changed, 405 insertions, 7 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0749c64..99c029d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,27 @@
+2011-04-26 Michael Meissner <meissner@linux.vnet.ibm.com>
+
+ PR target/48258
+ * config/rs6000/vector.md (UNSPEC_REDUC): New unspec for vector
+ reduction.
+ (VEC_reduc): New code iterator and splitters for vector reduction.
+ (VEC_reduc_name): Ditto.
+ (VEC_reduc_rtx): Ditto.
+ (reduc_<VEC_reduc_name>_v2df): Vector reduction expanders for VSX.
+ (reduc_<VEC_reduc_name>_v4sf): Ditto.
+
+ * config/rs6000/rs6000.c (rs6000_expand_vector_extract): Add
+ support for extracting SF on VSX.
+
+ * config/rs6000/vsx.md (vsx_xscvspdp_scalar2): New insn for
+ generating xscvspdp.
+ (vsx_extract_v4sf): New insn to extract SF from V4SF vector.
+ (vsx_reduc_<VEC_reduc_name>_v2df): New insns and splitters for
+ double add, minimum, maximum vector reduction.
+ (vsx_reduc_<VEC_reduc_name>_v4sf): Ditto.
+ (vsx_reduc_<VEC_reduc_name>_v2df2_scalar): New combiner insn to
+ optimize double vector reduction.
+ (vsx_reduc_<VEC_reduc_name>_v4sf_scalar): Ditto.
+
2011-04-26 Joseph Myers <joseph@codesourcery.com>
* config/fr30/fr30.h (inhibit_libc): Don't define.
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6113a75..4125963 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5463,12 +5463,22 @@ rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
enum machine_mode inner_mode = GET_MODE_INNER (mode);
rtx mem;
- if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
+ if (VECTOR_MEM_VSX_P (mode))
{
- rtx (*extract_func) (rtx, rtx, rtx)
- = ((mode == V2DFmode) ? gen_vsx_extract_v2df : gen_vsx_extract_v2di);
- emit_insn (extract_func (target, vec, GEN_INT (elt)));
- return;
+ switch (mode)
+ {
+ default:
+ break;
+ case V2DFmode:
+ emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
+ return;
+ case V2DImode:
+ emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
+ return;
+ case V4SFmode:
+ emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
+ return;
+ }
}
/* Allocate mode-sized buffer. */
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index a3a8e12..c011250 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -74,7 +74,19 @@
(V2DF "V2DI")])
;; constants for unspec
-(define_c_enum "unspec" [UNSPEC_PREDICATE])
+(define_c_enum "unspec" [UNSPEC_PREDICATE
+ UNSPEC_REDUC])
+
+;; Vector reduction code iterators
+(define_code_iterator VEC_reduc [plus smin smax])
+
+(define_code_attr VEC_reduc_name [(plus "splus")
+ (smin "smin")
+ (smax "smax")])
+
+(define_code_attr VEC_reduc_rtx [(plus "add")
+ (smin "smin")
+ (smax "smax")])
;; Vector move instructions.
@@ -991,6 +1003,41 @@
"TARGET_ALTIVEC"
"")
+;; Vector reduction expanders for VSX
+
+(define_expand "reduc_<VEC_reduc_name>_v2df"
+ [(parallel [(set (match_operand:V2DF 0 "vfloat_operand" "")
+ (VEC_reduc:V2DF
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "vfloat_operand" "")
+ (parallel [(const_int 1)]))
+ (vec_select:DF
+ (match_dup 1)
+ (parallel [(const_int 0)])))
+ (match_dup 1)))
+ (clobber (match_scratch:V2DF 2 ""))])]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "")
+
+; The (VEC_reduc:V4SF
+; (op1)
+; (unspec:V4SF [(const_int 0)] UNSPEC_REDUC))
+;
+; is to allow us to use a code iterator, but not completely list all of the
+; vector rotates, etc. to prevent canonicalization
+
+(define_expand "reduc_<VEC_reduc_name>_v4sf"
+ [(parallel [(set (match_operand:V4SF 0 "vfloat_operand" "")
+ (VEC_reduc:V4SF
+ (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
+ (match_operand:V4SF 1 "vfloat_operand" "")))
+ (clobber (match_scratch:V4SF 2 ""))
+ (clobber (match_scratch:V4SF 3 ""))])]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+ "")
+
+
;;; Expanders for vector insn patterns shared between the SPE and TARGET_PAIRED systems.
(define_expand "absv2sf2"
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index fc331dc..d4f5296 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -829,6 +829,15 @@
"xscvdpsp %x0,%x1"
[(set_attr "type" "fp")])
+;; Same as vsx_xscvspdp, but use SF as the type
+(define_insn "vsx_xscvspdp_scalar2"
+ [(set (match_operand:SF 0 "vsx_register_operand" "=f")
+ (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_CVSPDP))]
+ "VECTOR_UNIT_VSX_P (DFmode)"
+ "xscvspdp %x0,%x1"
+ [(set_attr "type" "fp")])
+
;; Convert from 64-bit to 32-bit types
;; Note, favor the Altivec registers since the usual use of these instructions
;; is in vector converts and we need to use the Altivec vperm instruction.
@@ -1039,6 +1048,43 @@
[(set_attr "type" "fpload")
(set_attr "length" "4")])
+;; Extract a SF element from V4SF
+(define_insn_and_split "vsx_extract_v4sf"
+ [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
+ (vec_select:SF
+ (match_operand:V4SF 1 "vsx_register_operand" "wa,wa")
+ (parallel [(match_operand:QI 2 "u5bit_cint_operand" "O,i")])))
+ (clobber (match_scratch:V4SF 3 "=X,0"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+ "@
+ xscvspdp %x0,%x1
+ #"
+ ""
+ [(const_int 0)]
+ "
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+ rtx tmp;
+ HOST_WIDE_INT ele = INTVAL (op2);
+
+ if (ele == 0)
+ tmp = op1;
+ else
+ {
+ if (GET_CODE (op3) == SCRATCH)
+ op3 = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, op2));
+ tmp = op3;
+ }
+ emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp));
+ DONE;
+}"
+ [(set_attr "length" "4,8")
+ (set_attr "type" "fp")])
+
;; General double word oriented permute, allow the other vector types for
;; optimizing the permute instruction.
(define_insn "vsx_xxpermdi_<mode>"
@@ -1150,3 +1196,153 @@
"VECTOR_MEM_VSX_P (<MODE>mode)"
"xxsldwi %x0,%x1,%x2,%3"
[(set_attr "type" "vecperm")])
+
+
+;; Vector reduction insns and splitters
+
+(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df"
+ [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa")
+ (VEC_reduc:V2DF
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
+ (parallel [(const_int 1)]))
+ (vec_select:DF
+ (match_dup 1)
+ (parallel [(const_int 0)])))
+ (match_dup 1)))
+ (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "#"
+ ""
+ [(const_int 0)]
+ "
+{
+ rtx tmp = (GET_CODE (operands[2]) == SCRATCH)
+ ? gen_reg_rtx (V2DFmode)
+ : operands[2];
+ emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx));
+ emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1]));
+ DONE;
+}"
+ [(set_attr "length" "8")
+ (set_attr "type" "veccomplex")])
+
+(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf"
+ [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa")
+ (VEC_reduc:V4SF
+ (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
+ (match_operand:V4SF 1 "vfloat_operand" "wf,wa")))
+ (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
+ (clobber (match_scratch:V4SF 3 "=&wf,&wa"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+ "#"
+ ""
+ [(const_int 0)]
+ "
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx tmp2, tmp3, tmp4;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp2 = gen_reg_rtx (V4SFmode);
+ tmp3 = gen_reg_rtx (V4SFmode);
+ tmp4 = gen_reg_rtx (V4SFmode);
+ }
+ else
+ {
+ tmp2 = operands[2];
+ tmp3 = operands[3];
+ tmp4 = tmp2;
+ }
+
+ emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
+ emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
+ emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
+ emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3));
+ DONE;
+}"
+ [(set_attr "length" "16")
+ (set_attr "type" "veccomplex")])
+
+;; Combiner patterns with the vector reduction patterns that knows we can get
+;; to the top element of the V2DF array without doing an extract.
+
+(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar"
+ [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?wa,ws,?wa")
+ (vec_select:DF
+ (VEC_reduc:V2DF
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa")
+ (parallel [(const_int 1)]))
+ (vec_select:DF
+ (match_dup 1)
+ (parallel [(const_int 0)])))
+ (match_dup 1))
+ (parallel [(const_int 1)])))
+ (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "#"
+ ""
+ [(const_int 0)]
+ "
+{
+ rtx hi = gen_highpart (DFmode, operands[1]);
+ rtx lo = (GET_CODE (operands[2]) == SCRATCH)
+ ? gen_reg_rtx (DFmode)
+ : operands[2];
+
+ emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx));
+ emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo));
+ DONE;
+}"
+ [(set_attr "length" "8")
+ (set_attr "type" "veccomplex")])
+
+(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar"
+ [(set (match_operand:SF 0 "vfloat_operand" "=f,?f")
+ (vec_select:SF
+ (VEC_reduc:V4SF
+ (unspec:V4SF [(const_int 0)] UNSPEC_REDUC)
+ (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))
+ (parallel [(const_int 3)])))
+ (clobber (match_scratch:V4SF 2 "=&wf,&wa"))
+ (clobber (match_scratch:V4SF 3 "=&wf,&wa"))
+ (clobber (match_scratch:V4SF 4 "=0,0"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+ "#"
+ ""
+ [(const_int 0)]
+ "
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx tmp2, tmp3, tmp4, tmp5;
+
+ if (can_create_pseudo_p ())
+ {
+ tmp2 = gen_reg_rtx (V4SFmode);
+ tmp3 = gen_reg_rtx (V4SFmode);
+ tmp4 = gen_reg_rtx (V4SFmode);
+ tmp5 = gen_reg_rtx (V4SFmode);
+ }
+ else
+ {
+ tmp2 = operands[2];
+ tmp3 = operands[3];
+ tmp4 = tmp2;
+ tmp5 = operands[4];
+ }
+
+ emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx));
+ emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1));
+ emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3)));
+ emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3));
+ emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5));
+ DONE;
+}"
+ [(set_attr "length" "20")
+ (set_attr "type" "veccomplex")])
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 79cb509..f6533ca 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,11 @@
+2011-03-23 Michael Meissner <meissner@linux.vnet.ibm.com>
+
+ PR target/48258
+ * gcc.target/powerpc/pr48258-1.c: New file.
+ * gcc.target/powerpc/pr48258-2.c: Ditto.
+
2011-04-26 Xinliang David Li <davidxl@google.com>
-
+
* gcc.dg/uninit-suppress.c: New test.
* gcc.dg/uninit-suppress.c: New test.
diff --git a/gcc/testsuite/gcc.target/powerpc/pr48258-1.c b/gcc/testsuite/gcc.target/powerpc/pr48258-1.c
new file mode 100644
index 0000000..4f37815
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr48258-1.c
@@ -0,0 +1,57 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O3 -mcpu=power7 -mabi=altivec -ffast-math -fno-unroll-loops" } */
+/* { dg-final { scan-assembler-times "xvaddsp" 3 } } */
+/* { dg-final { scan-assembler-times "xvminsp" 3 } } */
+/* { dg-final { scan-assembler-times "xvmaxsp" 3 } } */
+/* { dg-final { scan-assembler-times "xxsldwi" 6 } } */
+/* { dg-final { scan-assembler-times "xscvspdp" 3 } } */
+/* { dg-final { scan-assembler-not "stvewx" } } */
+/* { dg-final { scan-assembler-not "stvx" } } */
+/* { dg-final { scan-assembler-not "stxvd2x" } } */
+/* { dg-final { scan-assembler-not "stxvw4x" } } */
+
+#include <stddef.h>
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+float values[SIZE] __attribute__((__aligned__(32)));
+
+float
+vector_sum (void)
+{
+ size_t i;
+ float sum = 0.0f;
+
+ for (i = 0; i < SIZE; i++)
+ sum += values[i];
+
+ return sum;
+}
+
+float
+vector_min (void)
+{
+ size_t i;
+ float min = values[0];
+
+ for (i = 0; i < SIZE; i++)
+ min = __builtin_fminf (min, values[i]);
+
+ return min;
+}
+
+float
+vector_max (void)
+{
+ size_t i;
+ float max = values[0];
+
+ for (i = 0; i < SIZE; i++)
+ max = __builtin_fmaxf (max, values[i]);
+
+ return max;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr48258-2.c b/gcc/testsuite/gcc.target/powerpc/pr48258-2.c
new file mode 100644
index 0000000..443fb62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr48258-2.c
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O3 -mcpu=power7 -mabi=altivec -ffast-math -fno-unroll-loops" } */
+/* { dg-final { scan-assembler-times "xvadddp" 1 } } */
+/* { dg-final { scan-assembler-times "xvmindp" 1 } } */
+/* { dg-final { scan-assembler-times "xvmaxdp" 1 } } */
+/* { dg-final { scan-assembler-times "xsadddp" 1 } } */
+/* { dg-final { scan-assembler-times "xsmindp" 1 } } */
+/* { dg-final { scan-assembler-times "xsmaxdp" 1 } } */
+/* { dg-final { scan-assembler-not "xxsldwi" } } */
+/* { dg-final { scan-assembler-not "stvx" } } */
+/* { dg-final { scan-assembler-not "stxvd2x" } } */
+/* { dg-final { scan-assembler-not "stxvw4x" } } */
+
+#include <stddef.h>
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+double values[SIZE] __attribute__((__aligned__(32)));
+
+double
+vector_sum (void)
+{
+ size_t i;
+ double sum = 0.0;
+
+ for (i = 0; i < SIZE; i++)
+ sum += values[i];
+
+ return sum;
+}
+
+double
+vector_min (void)
+{
+ size_t i;
+ double min = values[0];
+
+ for (i = 0; i < SIZE; i++)
+ min = __builtin_fmin (min, values[i]);
+
+ return min;
+}
+
+double
+vector_max (void)
+{
+ size_t i;
+ double max = values[0];
+
+ for (i = 0; i < SIZE; i++)
+ max = __builtin_fmax (max, values[i]);
+
+ return max;
+}