aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog37
-rw-r--r--gcc/config/rs6000/altivec.md195
-rw-r--r--gcc/expr.c7
-rw-r--r--gcc/genopinit.c5
-rw-r--r--gcc/optabs.c68
-rw-r--r--gcc/optabs.h14
-rw-r--r--gcc/testsuite/ChangeLog18
-rw-r--r--gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-5.c53
-rw-r--r--gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-7.c53
-rw-r--r--gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c49
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-1.c6
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-1char.c51
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-1short.c51
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-2.c12
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c51
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c51
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-3.c12
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-reduc-6.c51
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect.exp20
-rw-r--r--gcc/testsuite/lib/target-supports.exp17
-rw-r--r--gcc/tree-inline.c2
-rw-r--r--gcc/tree-pretty-print.c10
-rw-r--r--gcc/tree-vect-generic.c1
-rw-r--r--gcc/tree-vect-transform.c257
-rw-r--r--gcc/tree.def6
25 files changed, 882 insertions, 215 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0113201..0cc4cf9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,40 @@
+2005-06-21 Dorit Nuzman <dorit@il.ibm.com>
+
+ * genopinit.c (vec_shl_optab, vec_shr_optab): Initialize new optabs.
+ (reduc_plus_optab): Removed. Replcaed with...
+ (reduc_splus_optab, reduc_uplus_optab): Initialize new optabs.
+ * optabs.c (optab_for_tree_code): Return reduc_splus_optab or
+ reduc_uplus_optab instead of reduc_plus_optab.
+ (expand_vec_shift_expr): New function.
+ (init_optabs): Initialize new optabs. Remove initialization of
+ reduc_plus_optab.
+ (optab_for_tree_code): Return vec_shl_optab/vec_shr_optab
+ for VEC_LSHIFT_EXPR/VEC_RSHIFT_EXPR.
+ * optabs.h (OTI_reduc_plus): Removed. Replaced with...
+ (OTI_reduc_splus, OTI_reduc_uplus): New.
+ (reduc_plus_optab): Removed. Replcaed with...
+ (reduc_splus_optab, reduc_uplus_optab): New optabs.
+ (vec_shl_optab, vec_shr_optab): New optabs.
+ (expand_vec_shift_expr): New function declaration.
+
+ * tree.def (VEC_LSHIFT_EXPR, VEC_RSHIFT_EXPR): New tree-codes.
+ * tree-inline.c (estimate_num_insns_1): Handle new tree-codes.
+ * expr.c (expand_expr_real_1): Handle new tree-codes.
+ * tree-pretty-print.c (dump_generic_node, op_symbol, op_prio): Likewise.
+ * tree-vect-generic.c (expand_vector_operations_1): Add assert.
+
+ * tree-vect-transform.c (vect_create_epilog_for_reduction): Add two
+ alternatives for generating reduction epilog code.
+ (vectorizable_reduction): Don't fail of direct reduction support is
+ not available.
+ (vectorizable_target_reduction_pattern): Likewise.
+
+ * config/rs6000/altivec.md (reduc_smax_v4si, reduc_smax_v4sf,
+ reduc_umax_v4si, reduc_smin_v4si, reduc_smin_v4sf, reduc_umin_v4si,
+ reduc_plus_v4si, reduc_plus_v4sf): Removed.
+ (vec_shl_<mode>, vec_shr_<mode>, altivec_vsumsws_nomode,
+ reduc_splus_<mode>, reduc_uplus_v16qi): New.
+
2005-06-20 Daniel Berlin <dberlin@dberlin.org>
* c-typeck.c (build_function_call): Set fundecl = function again.
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 3b20447..7bfd5d9 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1825,157 +1825,100 @@
operands[3] = gen_reg_rtx (GET_MODE (operands[0]));
})
-;; Reduction
-
-(define_expand "reduc_smax_v4si"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
+;; Vector shift left in bits. Currently supported ony for shift
+;; amounts that can be expressed as byte shifts (divisible by 8).
+;; General shift amounts can be supported using vslo + vsl. We're
+;; not expecting to see these yet (the vectorizer currently
+;; generates only shifts divisible by byte_size).
+(define_expand "vec_shl_<mode>"
+ [(set (match_operand:V 0 "register_operand" "=v")
+ (unspec:V [(match_operand:V 1 "register_operand" "v")
+ (match_operand:QI 2 "reg_or_short_operand" "")] 219 ))]
"TARGET_ALTIVEC"
"
-{
- rtx vtmp1 = gen_reg_rtx (V4SImode);
- rtx vtmp2 = gen_reg_rtx (V4SImode);
- rtx vtmp3 = gen_reg_rtx (V4SImode);
-
- emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_smaxv4si3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_smaxv4si3 (operands[0], vtmp2, vtmp3));
+{
+ rtx bitshift = operands[2];
+ rtx byteshift = gen_reg_rtx (QImode);
+ HOST_WIDE_INT bitshift_val;
+ HOST_WIDE_INT byteshift_val;
+
+ if (! CONSTANT_P (bitshift))
+ FAIL;
+ bitshift_val = INTVAL (bitshift);
+ if (bitshift_val & 0x7)
+ FAIL;
+ byteshift_val = bitshift_val >> 3;
+ byteshift = gen_rtx_CONST_INT (QImode, byteshift_val);
+ emit_insn (gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
+ byteshift));
DONE;
}")
-(define_expand "reduc_smax_v4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=v")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
+;; Vector shift left in bits. Currently supported ony for shift
+;; amounts that can be expressed as byte shifts (divisible by 8).
+;; General shift amounts can be supported using vsro + vsr. We're
+;; not expecting to see these yet (the vectorizer currently
+;; generates only shifts divisible by byte_size).
+(define_expand "vec_shr_<mode>"
+ [(set (match_operand:V 0 "register_operand" "=v")
+ (unspec:V [(match_operand:V 1 "register_operand" "v")
+ (match_operand:QI 2 "reg_or_short_operand" "")] 219 ))]
"TARGET_ALTIVEC"
"
-{
- rtx vtmp1 = gen_reg_rtx (V4SFmode);
- rtx vtmp2 = gen_reg_rtx (V4SFmode);
- rtx vtmp3 = gen_reg_rtx (V4SFmode);
-
- emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_smaxv4sf3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_smaxv4sf3 (operands[0], vtmp2, vtmp3));
+{
+ rtx bitshift = operands[2];
+ rtx byteshift = gen_reg_rtx (QImode);
+ HOST_WIDE_INT bitshift_val;
+ HOST_WIDE_INT byteshift_val;
+
+ if (! CONSTANT_P (bitshift))
+ FAIL;
+ bitshift_val = INTVAL (bitshift);
+ if (bitshift_val & 0x7)
+ FAIL;
+ byteshift_val = 16 - (bitshift_val >> 3);
+ byteshift = gen_rtx_CONST_INT (QImode, byteshift_val);
+ emit_insn (gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
+ byteshift));
DONE;
}")
-(define_expand "reduc_umax_v4si"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
+(define_insn "altivec_vsumsws_nomode"
+ [(set (match_operand 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v")] 135))
+ (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
"TARGET_ALTIVEC"
- "
-{
- rtx vtmp1 = gen_reg_rtx (V4SImode);
- rtx vtmp2 = gen_reg_rtx (V4SImode);
- rtx vtmp3 = gen_reg_rtx (V4SImode);
-
- emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_umaxv4si3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_umaxv4si3 (operands[0], vtmp2, vtmp3));
- DONE;
-}")
+ "vsumsws %0,%1,%2"
+ [(set_attr "type" "veccomplex")])
-(define_expand "reduc_smin_v4si"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
+(define_expand "reduc_splus_<mode>"
+ [(set (match_operand:VIshort 0 "register_operand" "=v")
+ (unspec:VIshort [(match_operand:VIshort 1 "register_operand" "v")] 217))]
"TARGET_ALTIVEC"
"
{
+ rtx vzero = gen_reg_rtx (V4SImode);
rtx vtmp1 = gen_reg_rtx (V4SImode);
- rtx vtmp2 = gen_reg_rtx (V4SImode);
- rtx vtmp3 = gen_reg_rtx (V4SImode);
-
- emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_sminv4si3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_sminv4si3 (operands[0], vtmp2, vtmp3));
- DONE;
-}")
-(define_expand "reduc_smin_v4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=v")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
- "TARGET_ALTIVEC"
- "
-{
- rtx vtmp1 = gen_reg_rtx (V4SFmode);
- rtx vtmp2 = gen_reg_rtx (V4SFmode);
- rtx vtmp3 = gen_reg_rtx (V4SFmode);
-
- emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_sminv4sf3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_sminv4sf3 (operands[0], vtmp2, vtmp3));
+ emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
+ emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero));
+ emit_insn (gen_altivec_vsumsws_nomode (operands[0], vtmp1, vzero));
DONE;
}")
-(define_expand "reduc_umin_v4si"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
+(define_expand "reduc_uplus_v16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] 217))]
"TARGET_ALTIVEC"
"
{
+ rtx vzero = gen_reg_rtx (V4SImode);
rtx vtmp1 = gen_reg_rtx (V4SImode);
- rtx vtmp2 = gen_reg_rtx (V4SImode);
- rtx vtmp3 = gen_reg_rtx (V4SImode);
-
- emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_uminv4si3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_uminv4si3 (operands[0], vtmp2, vtmp3));
- DONE;
-}")
-(define_expand "reduc_plus_v4si"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
- "TARGET_ALTIVEC"
- "
-{
- rtx vtmp1 = gen_reg_rtx (V4SImode);
- rtx vtmp2 = gen_reg_rtx (V4SImode);
- rtx vtmp3 = gen_reg_rtx (V4SImode);
-
- emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_addv4si3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_addv4si3 (operands[0], vtmp2, vtmp3));
- DONE;
-}")
-
-(define_expand "reduc_plus_v4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=v")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
- "TARGET_ALTIVEC"
- "
-{
- rtx vtmp1 = gen_reg_rtx (V4SFmode);
- rtx vtmp2 = gen_reg_rtx (V4SFmode);
- rtx vtmp3 = gen_reg_rtx (V4SFmode);
-
- emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
- gen_rtx_CONST_INT (SImode, 8)));
- emit_insn (gen_addv4sf3 (vtmp2, operands[1], vtmp1));
- emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
- gen_rtx_CONST_INT (SImode, 4)));
- emit_insn (gen_addv4sf3 (operands[0], vtmp2, vtmp3));
+ emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
+ emit_insn (gen_altivec_vsum4ubs (vtmp1, operands[1], vzero));
+ emit_insn (gen_altivec_vsumsws_nomode (operands[0], vtmp1, vzero));
DONE;
}")
diff --git a/gcc/expr.c b/gcc/expr.c
index 0f9b1d2..573ec5e 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -8367,6 +8367,13 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
return temp;
}
+ case VEC_LSHIFT_EXPR:
+ case VEC_RSHIFT_EXPR:
+ {
+ target = expand_vec_shift_expr (exp, target);
+ return target;
+ }
+
default:
return lang_hooks.expand_expr (exp, original_target, tmode,
modifier, alt_rtl);
diff --git a/gcc/genopinit.c b/gcc/genopinit.c
index eea084d..19a7f7c 100644
--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@@ -196,6 +196,8 @@ static const char * const optabs[] =
"vec_set_optab->handlers[$A].insn_code = CODE_FOR_$(vec_set$a$)",
"vec_extract_optab->handlers[$A].insn_code = CODE_FOR_$(vec_extract$a$)",
"vec_init_optab->handlers[$A].insn_code = CODE_FOR_$(vec_init$a$)",
+ "vec_shl_optab->handlers[$A].insn_code = CODE_FOR_$(vec_shl_$a$)",
+ "vec_shr_optab->handlers[$A].insn_code = CODE_FOR_$(vec_shr_$a$)",
"vec_realign_load_optab->handlers[$A].insn_code = CODE_FOR_$(vec_realign_load_$a$)",
"vcond_gen_code[$A] = CODE_FOR_$(vcond$a$)",
"vcondu_gen_code[$A] = CODE_FOR_$(vcondu$a$)",
@@ -203,7 +205,8 @@ static const char * const optabs[] =
"reduc_umax_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_umax_$a$)",
"reduc_smin_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_smin_$a$)",
"reduc_umin_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_umin_$a$)",
- "reduc_plus_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_plus_$a$)"
+ "reduc_splus_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_splus_$a$)" ,
+ "reduc_uplus_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_uplus_$a$)"
};
static void gen_insn (rtx);
diff --git a/gcc/optabs.c b/gcc/optabs.c
index cd4f2cb..2202727 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -301,7 +301,13 @@ optab_for_tree_code (enum tree_code code, tree type)
return TYPE_UNSIGNED (type) ? reduc_umin_optab : reduc_smin_optab;
case REDUC_PLUS_EXPR:
- return reduc_plus_optab;
+ return TYPE_UNSIGNED (type) ? reduc_uplus_optab : reduc_splus_optab;
+
+ case VEC_LSHIFT_EXPR:
+ return vec_shl_optab;
+
+ case VEC_RSHIFT_EXPR:
+ return vec_shr_optab;
default:
break;
@@ -443,6 +449,61 @@ force_expand_binop (enum machine_mode mode, optab binoptab,
return true;
}
+/* Generate insns for VEC_LSHIFT_EXPR, VEC_RSHIFT_EXPR. */
+
+rtx
+expand_vec_shift_expr (tree vec_shift_expr, rtx target)
+{
+ enum insn_code icode;
+ rtx rtx_op1, rtx_op2;
+ enum machine_mode mode1;
+ enum machine_mode mode2;
+ enum machine_mode mode = TYPE_MODE (TREE_TYPE (vec_shift_expr));
+ tree vec_oprnd = TREE_OPERAND (vec_shift_expr, 0);
+ tree shift_oprnd = TREE_OPERAND (vec_shift_expr, 1);
+ optab shift_optab;
+ rtx pat;
+
+ switch (TREE_CODE (vec_shift_expr))
+ {
+ case VEC_RSHIFT_EXPR:
+ shift_optab = vec_shr_optab;
+ break;
+ case VEC_LSHIFT_EXPR:
+ shift_optab = vec_shl_optab;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ icode = (int) shift_optab->handlers[(int) mode].insn_code;
+ gcc_assert (icode != CODE_FOR_nothing);
+
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+
+ rtx_op1 = expand_expr (vec_oprnd, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+ if (!(*insn_data[icode].operand[1].predicate) (rtx_op1, mode1)
+ && mode1 != VOIDmode)
+ rtx_op1 = force_reg (mode1, rtx_op1);
+
+ rtx_op2 = expand_expr (shift_oprnd, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+ if (!(*insn_data[icode].operand[2].predicate) (rtx_op2, mode2)
+ && mode2 != VOIDmode)
+ rtx_op2 = force_reg (mode2, rtx_op2);
+
+ if (!target
+ || ! (*insn_data[icode].operand[0].predicate) (target, mode))
+ target = gen_reg_rtx (mode);
+
+ /* Emit instruction */
+ pat = GEN_FCN (icode) (target, rtx_op1, rtx_op2);
+ gcc_assert (pat);
+ emit_insn (pat);
+
+ return target;
+}
+
/* This subroutine of expand_doubleword_shift handles the cases in which
the effective shift value is >= BITS_PER_WORD. The arguments and return
value are the same as for the parent routine, except that SUPERWORD_OP1
@@ -5074,11 +5135,14 @@ init_optabs (void)
reduc_umax_optab = init_optab (UNKNOWN);
reduc_smin_optab = init_optab (UNKNOWN);
reduc_umin_optab = init_optab (UNKNOWN);
- reduc_plus_optab = init_optab (UNKNOWN);
+ reduc_splus_optab = init_optab (UNKNOWN);
+ reduc_uplus_optab = init_optab (UNKNOWN);
vec_extract_optab = init_optab (UNKNOWN);
vec_set_optab = init_optab (UNKNOWN);
vec_init_optab = init_optab (UNKNOWN);
+ vec_shl_optab = init_optab (UNKNOWN);
+ vec_shr_optab = init_optab (UNKNOWN);
vec_realign_load_optab = init_optab (UNKNOWN);
movmisalign_optab = init_optab (UNKNOWN);
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 2495fed..91afce3 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -236,7 +236,8 @@ enum optab_index
OTI_reduc_umax,
OTI_reduc_smin,
OTI_reduc_umin,
- OTI_reduc_plus,
+ OTI_reduc_splus,
+ OTI_reduc_uplus,
/* Set specified field of vector operand. */
OTI_vec_set,
@@ -244,6 +245,9 @@ enum optab_index
OTI_vec_extract,
/* Initialize vector operand. */
OTI_vec_init,
+ /* Whole vector shift. The shift amount is in bits. */
+ OTI_vec_shl,
+ OTI_vec_shr,
/* Extract specified elements from vectors, for vector load. */
OTI_vec_realign_load,
@@ -358,11 +362,14 @@ extern GTY(()) optab optab_table[OTI_MAX];
#define reduc_umax_optab (optab_table[OTI_reduc_umax])
#define reduc_smin_optab (optab_table[OTI_reduc_smin])
#define reduc_umin_optab (optab_table[OTI_reduc_umin])
-#define reduc_plus_optab (optab_table[OTI_reduc_plus])
+#define reduc_splus_optab (optab_table[OTI_reduc_splus])
+#define reduc_uplus_optab (optab_table[OTI_reduc_uplus])
#define vec_set_optab (optab_table[OTI_vec_set])
#define vec_extract_optab (optab_table[OTI_vec_extract])
#define vec_init_optab (optab_table[OTI_vec_init])
+#define vec_shl_optab (optab_table[OTI_vec_shl])
+#define vec_shr_optab (optab_table[OTI_vec_shr])
#define vec_realign_load_optab (optab_table[OTI_vec_realign_load])
#define powi_optab (optab_table[OTI_powi])
@@ -575,4 +582,7 @@ bool expand_vec_cond_expr_p (tree, enum machine_mode);
/* Generate code for VEC_COND_EXPR. */
extern rtx expand_vec_cond_expr (tree, rtx);
+/* Generate code for VEC_LSHIFT_EXPR and VEC_RSHIFT_EXPR. */
+extern rtx expand_vec_shift_expr (tree, rtx);
+
#endif /* GCC_OPTABS_H */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e452f4cd..a526fb1 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,21 @@
+2005-06-21 Dorit Nuzman <dorit@il.ibm.com>
+
+ * lib/target-supports.exp (check_effective_target_vect_reduction):
+ Remove.
+ * gcc.dg/vect/vect.exp: Run tests with additional flags separately.
+ * gcc.dg/vect/vect-reduc-1.c: Vectorizable on all relevant platforms -
+ remove vect_reduction target keyword. Also avoid two returns in main.
+ * gcc.dg/vect/vect-reduc-3.c: Likewise.
+ * gcc.dg/vect/vect-reduc-2.c: Likewise. Also initialize diff to 0.
+ * gcc.dg/vect/vect-reduc-1short.c: New test.
+ * gcc.dg/vect/vect-reduc-1char.c: New test.
+ * gcc.dg/vect/vect-reduc-2short.c: New test.
+ * gcc.dg/vect/vect-reduc-2char.c: New test.
+ * gcc.dg/vect/vect-reduc-6.c: New test.
+ * gcc.dg/vect/trapv-vect-reduc-4.c: New test.
+ * gcc.dg/vect/fast-math-vect-reduc-5.c: New test.
+ * gcc.dg/vect/fast-math-vect-reduc-7.c: New test
+
2005-06-21 Tobias Schl"uter <tobias.schlueter@physik.uni-muenchen.de>
Paul Thomas <pault@gcc.gnu.org>
diff --git a/gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-5.c b/gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-5.c
new file mode 100644
index 0000000..dd84f5c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-5.c
@@ -0,0 +1,53 @@
+/* { dg-require-effective-target vect_float } */
+
+/* need -funsafe-math-optimizations to vectorize the summation.
+ also need -ffinite-math-only to create the min/max expr. */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+#define DIFF 242
+
+int main1 (float x, float max_result)
+{
+ int i;
+ float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+ float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ float diff = 2;
+ float max = x;
+ float min = 10;
+
+ for (i = 0; i < N; i++) {
+ diff += (b[i] - c[i]);
+ }
+
+ for (i = 0; i < N; i++) {
+ max = max < c[i] ? c[i] : max;
+ }
+
+ for (i = 0; i < N; i++) {
+ min = min > c[i] ? c[i] : min;
+ }
+
+ /* check results: */
+ if (diff != DIFF)
+ abort ();
+ if (max != max_result)
+ abort ();
+ if (min != 0)
+ abort ();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (100, 100);
+ main1 (0, 15);
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-7.c b/gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-7.c
new file mode 100644
index 0000000..797b1a7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-7.c
@@ -0,0 +1,53 @@
+/* { dg-require-effective-target vect_double } */
+
+/* need -funsafe-math-optimizations to vectorize the summation.
+ also need -ffinite-math-only to create the min/max expr. */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+#define DIFF 242
+
+int main1 (double x, double max_result)
+{
+ int i;
+ double b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+ double c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ double diff = 2;
+ double max = x;
+ double min = 10;
+
+ for (i = 0; i < N; i++) {
+ diff += (b[i] - c[i]);
+ }
+
+ for (i = 0; i < N; i++) {
+ max = max < c[i] ? c[i] : max;
+ }
+
+ for (i = 0; i < N; i++) {
+ min = min > c[i] ? c[i] : min;
+ }
+
+ /* check results: */
+ if (diff != DIFF)
+ abort ();
+ if (max != max_result)
+ abort ();
+ if (min != 0)
+ abort ();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (100, 100);
+ main1 (0, 15);
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
new file mode 100644
index 0000000..2129717
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
@@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-do compile } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+#define DIFF 242
+
+int main1 (int x, int max_result)
+{
+ int i;
+ int b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+ int c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ int diff = 2;
+ int max = x;
+ int min = 10;
+
+ for (i = 0; i < N; i++) {
+ diff += (b[i] - c[i]);
+ }
+
+ for (i = 0; i < N; i++) {
+ max = max < c[i] ? c[i] : max;
+ }
+
+ for (i = 0; i < N; i++) {
+ min = min > c[i] ? c[i] : min;
+ }
+
+ /* check results: */
+ if (diff != DIFF)
+ abort ();
+ if (max != max_result)
+ abort ();
+ if (min != 0)
+ abort ();
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (100, 100);
+ main1 (0, 15);
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c
index cb29357..bc87a5c 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-1.c
@@ -47,9 +47,9 @@ int main (void)
{
check_vect ();
- return main1 (100, 100);
- return main1 (0, 15);
+ main1 (100, 100);
+ main1 (0, 15);
}
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail {! vect_reduction} } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-1char.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-1char.c
new file mode 100644
index 0000000..e85fa4a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-1char.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+#define DIFF 242
+
+int main1 (unsigned char x, unsigned char max_result)
+{
+ int i;
+ unsigned char ub[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+ unsigned char uc[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ unsigned char udiff = 2;
+ unsigned char umax = x;
+ unsigned char umin = 10;
+
+ for (i = 0; i < N; i++) {
+ udiff += (unsigned char)(ub[i] - uc[i]);
+ }
+
+ for (i = 0; i < N; i++) {
+ umax = umax < uc[i] ? uc[i] : umax;
+ }
+
+ for (i = 0; i < N; i++) {
+ umin = umin > uc[i] ? uc[i] : umin;
+ }
+
+ /* check results: */
+ if (udiff != DIFF)
+ abort ();
+ if (umax != max_result)
+ abort ();
+ if (umin != 0)
+ abort ();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (100, 100);
+ main1 (0, 15);
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-1short.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-1short.c
new file mode 100644
index 0000000..bd116be
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-1short.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+#define DIFF 242
+
+int main1 (unsigned short x, unsigned short max_result)
+{
+ int i;
+ unsigned short ub[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+ unsigned short uc[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ unsigned short udiff = 2;
+ unsigned short umax = x;
+ unsigned short umin = 10;
+
+ for (i = 0; i < N; i++) {
+ udiff += (unsigned short)(ub[i] - uc[i]);
+ }
+
+ for (i = 0; i < N; i++) {
+ umax = umax < uc[i] ? uc[i] : umax;
+ }
+
+ for (i = 0; i < N; i++) {
+ umin = umin > uc[i] ? uc[i] : umin;
+ }
+
+ /* check results: */
+ if (udiff != DIFF)
+ abort ();
+ if (umax != max_result)
+ abort ();
+ if (umin != 0)
+ abort ();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (100, 100);
+ main1 (0, 15);
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2.c
index e44d3f3..ca1a3da 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2.c
@@ -1,11 +1,10 @@
-
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
-#define DIFF 242
+#define DIFF 240
/* Test vectorization of reduction of signed-int. */
@@ -14,7 +13,7 @@ int main1 (int x, int max_result)
int i;
int b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
int c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
- int diff = 2;
+ int diff = 0;
int max = x;
int min = 10;
@@ -45,9 +44,10 @@ int main (void)
{
check_vect ();
- return main1 (100, 100);
- return main1 (0, 15);
+ main1 (100, 100);
+ main1 (0, 15);
+ return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail {! vect_reduction} } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c
new file mode 100644
index 0000000..eddc2cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+#define DIFF 121
+
+int main1 (char x, char max_result)
+{
+ int i;
+ char b[N] = {0,2,3,6,8,10,12,14,16,18,20,22,24,26,28,30};
+ char c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ signed char diff = 2;
+ char max = x;
+ char min = 10;
+
+ for (i = 0; i < N; i++) {
+ diff += (b[i] - c[i]);
+ }
+
+ for (i = 0; i < N; i++) {
+ max = max < c[i] ? c[i] : max;
+ }
+
+ for (i = 0; i < N; i++) {
+ min = min > c[i] ? c[i] : min;
+ }
+
+ /* check results: */
+ if (diff != DIFF)
+ abort ();
+ if (max != max_result)
+ abort ();
+ if (min != 0)
+ abort ();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (100, 100);
+ main1 (0, 15);
+ return 0 ;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c
new file mode 100644
index 0000000..f0880aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+#define DIFF 242
+
+int main1 (short x, short max_result)
+{
+ int i;
+ short b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+ short c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ short diff = 2;
+ short max = x;
+ short min = 10;
+
+ for (i = 0; i < N; i++) {
+ diff += (b[i] - c[i]);
+ }
+ for (i = 0; i < N; i++) {
+ max = max < c[i] ? c[i] : max;
+ }
+
+ for (i = 0; i < N; i++) {
+ min = min > c[i] ? c[i] : min;
+ }
+
+ /* check results: */
+ if (diff != DIFF)
+ abort ();
+ if (max != max_result)
+ abort ();
+ if (min != 0)
+ abort ();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (100, 100);
+ main1 (0, 15);
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-3.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-3.c
index 8937254..0011837 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-3.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-3.c
@@ -4,12 +4,11 @@
#include "tree-vect.h"
#define N 16
-#define DIFF 240
/* Test vectorization of reduction of unsigned-int in the presence
of unknown-loop-bound. */
-int main1 (int n)
+int main1 (int n, int res)
{
int i;
unsigned int ub[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
@@ -22,7 +21,7 @@ int main1 (int n)
}
/* check results: */
- if (udiff != DIFF)
+ if (udiff != res)
abort ();
return 0;
@@ -32,9 +31,10 @@ int main (void)
{
check_vect ();
- return main1 (N);
- return main1 (N-1);
+ main1 (N, 240);
+ main1 (N-1, 210);
+ return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail {! vect_reduction} } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c
new file mode 100644
index 0000000..4e4f155
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_float } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+#define DIFF 242
+
+int main1 (float x, float max_result)
+{
+ int i;
+ float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+ float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ float diff = 2;
+ float max = x;
+ float min = 10;
+
+ for (i = 0; i < N; i++) {
+ diff += (b[i] - c[i]);
+ }
+
+ for (i = 0; i < N; i++) {
+ max = max < c[i] ? c[i] : max;
+ }
+
+ for (i = 0; i < N; i++) {
+ min = min > c[i] ? c[i] : min;
+ }
+
+ /* check results: */
+ if (diff != DIFF)
+ abort ();
+ if (max != max_result)
+ abort ();
+ if (min != 0)
+ abort ();
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (100 ,100);
+ main1 (0, 15);
+ return 0;
+}
+
+/* need -ffast-math to vectorizer these loops. */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect.exp b/gcc/testsuite/gcc.dg/vect/vect.exp
index 3f52ed6..6ab7e3d 100644
--- a/gcc/testsuite/gcc.dg/vect/vect.exp
+++ b/gcc/testsuite/gcc.dg/vect/vect.exp
@@ -76,7 +76,25 @@ if [istarget "powerpc*-*-*"] {
dg-init
# Main loop.
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.\[cS\]]] \
+ "" $DEFAULT_VECTCFLAGS
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vect-*.\[cS\]]] \
+ "" $DEFAULT_VECTCFLAGS
+
+#### Tests with special options
+global SAVED_DEFAULT_VECTCFLAGS
+set SAVED_DEFAULT_VECTCFLAGS $DEFAULT_VECTCFLAGS
+
+# -ffast-math tests
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-ffast-math"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-vect*.\[cS\]]] \
+ "" $DEFAULT_VECTCFLAGS
+
+# -ftrapv tests
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-ftrapv"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/trapv-vect*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
# Clean up.
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 4facec6..0378169 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -988,23 +988,6 @@ proc check_effective_target_vect_int_mult { } {
return $et_vect_int_mult_saved
}
-# Return 1 if the target supports vector reduction
-
-proc check_effective_target_vect_reduction { } {
- global et_vect_reduction_saved
-
- if [info exists et_vect_reduction_saved] {
- verbose "check_effective_target_vect_reduction: using cached result" 2
- } else {
- set et_vect_reduction_saved 0
- if { [istarget powerpc*-*-*] } {
- set et_vect_reduction_saved 1
- }
- }
- verbose "check_effective_target_vect_reduction: returning $et_vect_reduction_saved" 2
- return $et_vect_reduction_saved
-}
-
# Return 1 if the target supports atomic operations on "int" and "long".
proc check_effective_target_sync_int_long { } {
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 7fa4350..ee30ccc 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -1692,6 +1692,8 @@ estimate_num_insns_1 (tree *tp, int *walk_subtrees, void *data)
case RSHIFT_EXPR:
case LROTATE_EXPR:
case RROTATE_EXPR:
+ case VEC_LSHIFT_EXPR:
+ case VEC_RSHIFT_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index 1922be7..04cc8fa 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -1043,6 +1043,8 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
case RSHIFT_EXPR:
case LROTATE_EXPR:
case RROTATE_EXPR:
+ case VEC_LSHIFT_EXPR:
+ case VEC_RSHIFT_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
case BIT_AND_EXPR:
@@ -1838,6 +1840,8 @@ op_prio (tree op)
case REDUC_MAX_EXPR:
case REDUC_MIN_EXPR:
case REDUC_PLUS_EXPR:
+ case VEC_LSHIFT_EXPR:
+ case VEC_RSHIFT_EXPR:
return 16;
case SAVE_EXPR:
@@ -1925,6 +1929,12 @@ op_symbol (tree op)
case RSHIFT_EXPR:
return ">>";
+ case VEC_LSHIFT_EXPR:
+ return "v<<";
+
+ case VEC_RSHIFT_EXPR:
+ return "v>>";
+
case PLUS_EXPR:
return "+";
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index 2da1ed2..fc75222 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -448,6 +448,7 @@ expand_vector_operations_1 (block_stmt_iterator *bsi)
compute_type = TREE_TYPE (type);
}
+ gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
rhs = expand_vector_operation (bsi, type, compute_type, rhs, code);
if (lang_hooks.types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
*p_rhs = rhs;
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 2b4d1d7..a4417d4 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -834,6 +834,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ enum machine_mode mode = TYPE_MODE (vectype);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block exit_bb;
@@ -843,15 +844,18 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
block_stmt_iterator exit_bsi;
tree vec_dest;
tree new_temp;
+ tree new_name;
tree epilog_stmt;
tree new_scalar_dest, exit_phi;
- tree bitsize, bitpos;
+ tree bitsize, bitpos, bytesize;
enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
tree scalar_initial_def;
tree vec_initial_def;
tree orig_name;
imm_use_iterator imm_iter;
use_operand_p use_p;
+ bool extract_scalar_result;
+ bool adjust_in_epilog;
/*** 1. Create the reduction def-use cycle ***/
@@ -888,63 +892,214 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
exit_bsi = bsi_start (exit_bb);
- /* 2.2 Create:
- v_out2 = reduc_expr <v_out1>
- s_out3 = extract_field <v_out2, 0> */
+ new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
+ bitsize = TYPE_SIZE (scalar_type);
+ bytesize = TYPE_SIZE_UNIT (scalar_type);
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
- epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
- build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
- new_temp = make_ssa_name (vec_dest, epilog_stmt);
- TREE_OPERAND (epilog_stmt, 0) = new_temp;
- bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+ /* 2.2 Create the reduction code. */
- if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ if (reduc_code < NUM_TREE_CODES)
{
- fprintf (vect_dump, "transform reduction: created epilog code:");
- print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
- }
+ /*** Case 1: Create:
+ v_out2 = reduc_expr <v_out1> */
- new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
- bitsize = TYPE_SIZE (scalar_type);
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "Reduce using direct vector reduction.");
- /* The result is in the low order bits. */
- if (BITS_BIG_ENDIAN)
- bitpos = size_binop (MULT_EXPR,
- bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
- TYPE_SIZE (scalar_type));
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
+ build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
+ new_temp = make_ssa_name (vec_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_temp;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+
+ extract_scalar_result = true;
+ adjust_in_epilog = true;
+ }
else
- bitpos = bitsize_zero_node;
+ {
+ enum tree_code shift_code;
+ bool have_whole_vector_shift = true;
+ enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */
+ int bit_offset;
+ int element_bitsize = tree_low_cst (bitsize, 1);
+ int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+ tree vec_temp;
+
+ /* The result of the reduction is expected to be at the LSB bits
+ of the vector. For big-endian targets this means at the right
+ end of the vector. For little-edian targets this means at the
+ left end of the vector. */
+
+ if (BITS_BIG_ENDIAN
+ && vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
+ shift_code = VEC_RSHIFT_EXPR;
+ else if (!BITS_BIG_ENDIAN
+ && vec_shl_optab->handlers[mode].insn_code != CODE_FOR_nothing)
+ shift_code = VEC_LSHIFT_EXPR;
+ else
+ have_whole_vector_shift = false;
+
+ if (have_whole_vector_shift)
+ {
+ /*** Case 2:
+ for (offset = VS/2; offset >= element_size; offset/=2)
+ {
+ Create: va' = vec_shift <va, offset>
+ Create: va = vop <va, va'>
+ } */
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "Reduce using vector shifts");
+
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ new_temp = PHI_RESULT (new_phi);
+
+ for (bit_offset = vec_size_in_bits/2;
+ bit_offset >= element_bitsize;
+ bit_offset /= 2)
+ {
+ tree bitpos = size_int (bit_offset);
+
+ epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
+ build2 (shift_code, vectype, new_temp, bitpos));
+ new_name = make_ssa_name (vec_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_name;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+
+
+ epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
+ build2 (code, vectype, new_name, new_temp));
+ new_temp = make_ssa_name (vec_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_temp;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+ }
+
+ extract_scalar_result = true;
+ adjust_in_epilog = true;
+ }
+ else
+ {
+ /*** Case 3:
+ Create: s = init;
+ for (offset=0; offset<vector_size; offset+=element_size;)
+ {
+ Create: s' = extract_field <v_out2, offset>
+ Create: s = op <s, s'>
+ } */
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "Reduce using scalar code. ");
+
+ vec_temp = PHI_RESULT (new_phi);
+ vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+
+ /* first iteration is peeled out when possible to minimize
+ the number of operations we generate: */
+ if (code == PLUS_EXPR
+ && (integer_zerop (scalar_initial_def)
+ || real_zerop (scalar_initial_def)))
+ {
+ epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
+ build3 (BIT_FIELD_REF, scalar_type,
+ vec_temp, bitsize, bitsize_zero_node));
+ new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_temp;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+
+ bit_offset = element_bitsize;
+ }
+ else
+ {
+ new_temp = scalar_initial_def;
+ bit_offset = 0;
+ }
- epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
- build3 (BIT_FIELD_REF, scalar_type,
- new_temp, bitsize, bitpos));
- new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
- TREE_OPERAND (epilog_stmt, 0) = new_temp;
- bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+ for (;
+ bit_offset < vec_size_in_bits;
+ bit_offset += element_bitsize)
+ {
+ tree bitpos = bitsize_int (bit_offset);
+
+ epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
+ build3 (BIT_FIELD_REF, scalar_type,
+ vec_temp, bitsize, bitpos));
+ new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_name;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+
+
+ epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
+ build2 (code, scalar_type, new_name, new_temp));
+ new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_temp;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+ }
+
+ extract_scalar_result = false;
+ adjust_in_epilog = false;
+ }
+ }
- if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
- print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+ /* 2.3 Extract the final scalar result. Create:
+ s_out3 = extract_field <v_out2, bitpos> */
- /* 2.3 Adjust the final result by the initial value of the reduction
- variable. (when such adjustment is not needed, then
- 'scalar_initial_def' is zero).
+ if (extract_scalar_result)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "extract scalar result");
- Create:
- s_out = scalar_expr <s_out, scalar_initial_def> */
+ /* The result is in the low order bits. */
+ if (BITS_BIG_ENDIAN)
+ bitpos = size_binop (MULT_EXPR,
+ bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
+ TYPE_SIZE (scalar_type));
+ else
+ bitpos = bitsize_zero_node;
+
+ epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
+ build3 (BIT_FIELD_REF, scalar_type,
+ new_temp, bitsize, bitpos));
+ new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_temp;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+ }
- epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
- build2 (code, scalar_type, new_temp, scalar_initial_def));
- new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
- TREE_OPERAND (epilog_stmt, 0) = new_temp;
- bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
- if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
- print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+ /* 2.4 Adjust the final result by the initial value of the reduction
+ variable. (when such adjustment is not needed, then
+ 'scalar_initial_def' is zero).
-
- /* 2.4 Replace uses of s_out0 with uses of s_out3 */
+ Create:
+ s_out = scalar_expr <s_out, scalar_initial_def> */
+
+ if (adjust_in_epilog)
+ {
+ epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
+ build2 (code, scalar_type, new_temp, scalar_initial_def));
+ new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+ TREE_OPERAND (epilog_stmt, 0) = new_temp;
+ bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
+
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
+ }
+
+
+ /* 2.5 Replace uses of s_out0 with uses of s_out3 */
/* Find the loop-closed-use at the loop exit of the original
scalar result. (The reduction result is expected to have
@@ -954,10 +1109,10 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
{
if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
- {
- exit_phi = USE_STMT (use_p);
- break;
- }
+ {
+ exit_phi = USE_STMT (use_p);
+ break;
+ }
}
orig_name = PHI_RESULT (exit_phi);
@@ -1067,13 +1222,13 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
{
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "no optab for reduction.");
- return false;
+ reduc_code = NUM_TREE_CODES;
}
if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
{
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
- fprintf (vect_dump, "op not supported by target.");
- return false;
+ fprintf (vect_dump, "reduc op not supported by target.");
+ reduc_code = NUM_TREE_CODES;
}
if (!vec_stmt) /* transformation not required. */
diff --git a/gcc/tree.def b/gcc/tree.def
index 2b8c280..26a8703 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -957,6 +957,12 @@ DEFTREECODE (REDUC_MAX_EXPR, "reduc_max_expr", tcc_unary, 1)
DEFTREECODE (REDUC_MIN_EXPR, "reduc_min_expr", tcc_unary, 1)
DEFTREECODE (REDUC_PLUS_EXPR, "reduc_plus_expr", tcc_unary, 1)
+/* Whole vector lesft/right shift in bytes.
+ Operand 0 is a vector to be shifted.
+ Operand 1 is an integer shift amount in bits. */
+DEFTREECODE (VEC_LSHIFT_EXPR, "vec_lshift_expr", tcc_binary, 2)
+DEFTREECODE (VEC_RSHIFT_EXPR, "vec_rshift_expr", tcc_binary, 2)
+
/*
Local variables:
mode:c