aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog35
-rw-r--r--gcc/testsuite/ChangeLog8
-rw-r--r--gcc/testsuite/gcc.dg/vect/no-scevccp-outer-2.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-double-reduc-1.c56
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-double-reduc-2.c56
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-double-reduc-3.c67
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-double-reduc-4.c56
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c58
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-double-reduc-6.c50
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-double-reduc-7.c65
-rw-r--r--gcc/tree-parloops.c4
-rw-r--r--gcc/tree-vect-loop.c493
-rw-r--r--gcc/tree-vect-stmts.c88
-rw-r--r--gcc/tree-vectorizer.h3
14 files changed, 893 insertions, 150 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c8a3950..edeb049 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,38 @@
+2009-07-12 Ira Rosen <irar@il.ibm.com>
+
+ * tree-parloops.c (loop_parallel_p): Call vect_is_simple_reduction
+ with additional argument.
+ * tree-vectorizer.h (enum vect_def_type): Add
+ vect_double_reduction_def.
+ (vect_is_simple_reduction): Add argument.
+ * tree-vect-loop.c (vect_determine_vectorization_factor): Fix
+ indentation.
+ (vect_analyze_scalar_cycles_1): Detect double reduction. Call
+ vect_is_simple_reduction with additional argument.
+ (vect_analyze_loop_operations): Handle exit phi nodes in case of
+ double reduction.
+ (reduction_code_for_scalar_code): Handle additional codes by
+ returning ERROR_MARK for them. Fix comment and indentation.
+ (vect_is_simple_reduction): Fix comment, add argument to specify
+ double reduction. Detect double reduction.
+ (get_initial_def_for_induction): Fix indentation.
+ (get_initial_def_for_reduction): Fix comment and indentation.
+ Handle double reduction. Create initial definitions that do not
+ require adjustment if ADJUSTMENT_DEF is NULL. Handle additional cases.
+ (vect_create_epilog_for_reduction): Fix comment, add argument to
+ handle double reduction. Use PLUS_EXPR in case of MINUS_EXPR in
+ epilogue result extraction. Create double reduction phi node and
+ replace relevant uses.
+ (vectorizable_reduction): Call vect_is_simple_reduction with
+ additional argument. Fix indentation. Update epilogue code treatment
+ according to the changes in reduction_code_for_scalar_code. Check
+ for double reduction. Call vect_create_epilog_for_reduction with
+ additional argument.
+ * tree-vect-stmts.c (process_use): Handle double reduction, update
+ documentation.
+ (vect_mark_stmts_to_be_vectorized): Handle double reduction.
+ (vect_get_vec_def_for_operand): Likewise.
+
2009-07-12 Danny Smith <dansmister@gmail.com>
* config/i386/winnt.c (i386_pe_determine_dllexport_p): Don't
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 06d7675..7df599e 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2009-07-12 Ira Rosen <irar@il.ibm.com>
+
+ * gcc.dg/vect/no-scevccp-outer-2.c: Expect to vectorize.
+ * gcc.dg/vect/vect-double-reduc-1.c, gcc.dg/vect/vect-double-reduc-2.c,
+ gcc.dg/vect/vect-double-reduc-3.c, gcc.dg/vect/vect-double-reduc-4.c,
+ gcc.dg/vect/vect-double-reduc-5.c, gcc.dg/vect/vect-double-reduc-6.c,
+ gcc.dg/vect/vect-double-reduc-7.c: New tests.
+
2009-07-12 Hans-Peter Nilsson <hp@axis.com>
* gfortran.dg/f2003_io_4.f03, gfortran.dg/read_size_noadvance.f90,
diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-2.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-2.c
index a9ac09c..13b3788 100644
--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-2.c
+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-2.c
@@ -1,4 +1,6 @@
/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
#define N 40
int
@@ -14,5 +16,5 @@ foo (){
return diff;
}
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-1.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-1.c
new file mode 100644
index 0000000..e335842
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-1.c
@@ -0,0 +1,56 @@
+/* { dg-require-effective-target vect_int_mult } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define K 32
+
+int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int out[K];
+int check_result[K] = {642816,660736,678656,696576,714496,732416,750336,768256,786176,804096,822016,839936,857856,875776,893696,911616,929536,947456,965376,983296,1001216,1019136,1037056,1054976,1072896,1090816,1108736,1126656,1144576,1162496,1180416,1198336};
+
+__attribute__ ((noinline)) void
+foo ()
+{
+ int sum = 0, i, j, k;
+
+ for (k = 0; k < K; k++)
+ {
+ sum = 0;
+ for (j = 0; j < K; j++)
+ for (i = 0; i < K; i++)
+ sum += in[i+k][j] * coeff[i][j];
+
+ out[k] = sum;
+ }
+}
+
+int main ()
+{
+ int i, j, k;
+
+ check_vect ();
+
+ for (j = 0; j < K; j++)
+ {
+ for (i = 0; i < 2*K; i++)
+ in[i][j] = i+j;
+
+ for (i = 0; i < K; i++)
+ coeff[i][j] = i+2;
+ }
+
+ foo();
+
+ for (k = 0; k < K; k++)
+ if (out[k] != check_result[k])
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-2.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-2.c
new file mode 100644
index 0000000..be469be
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-2.c
@@ -0,0 +1,56 @@
+/* { dg-require-effective-target vect_int_mult } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define K 32
+
+int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int out[K];
+int check_result[K] = {357184,339264,321344,303424,285504,267584,249664,231744,213824,195904,177984,160064,142144,124224,106304,88384,70464,52544,34624,16704,-1216,-19136,-37056,-54976,-72896,-90816,-108736,-126656,-144576,-162496,-180416,-198336};
+
+__attribute__ ((noinline)) void
+foo ()
+{
+ int res = 0, i, j, k;
+
+ for (k = 0; k < K; k++)
+ {
+ res = 1000000;
+ for (j = 0; j < K; j++)
+ for (i = 0; i < K; i++)
+ res -= in[i+k][j] * coeff[i][j];
+
+ out[k] = res;
+ }
+}
+
+int main ()
+{
+ int i, j, k;
+
+ check_vect ();
+
+ for (j = 0; j < K; j++)
+ {
+ for (i = 0; i < 2*K; i++)
+ in[i][j] = i+j;
+
+ for (i = 0; i < K; i++)
+ coeff[i][j] = i+2;
+ }
+
+ foo();
+
+ for (k = 0; k < K; k++)
+ if (out[k] != check_result[k])
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-3.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-3.c
new file mode 100644
index 0000000..87b5a04
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-3.c
@@ -0,0 +1,67 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define K 32
+
+int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int out_max[K], out_min[K];
+int check_max[K] = {62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93};
+int check_min[K] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
+
+__attribute__ ((noinline)) void
+foo (int x, int y)
+{
+ int max, min, i, j, k;
+
+ for (k = 0; k < K; k++)
+ {
+ max = x;
+ min = y;
+ for (j = 0; j < K; j++)
+ for (i = 0; i < K; i++)
+ {
+ max = max < in[i+k][j] ? in[i+k][j] : max;
+ min = min > in[i+k][j] ? in[i+k][j] : min;
+ }
+ out_max[k] = max;
+ out_min[k] = min;
+ }
+}
+
+int main ()
+{
+ int i, j, k;
+
+ check_vect ();
+
+ for (j = 0; j < K; j++)
+ {
+ for (i = 0; i < 2*K; i++)
+ in[i][j] = i+j;
+
+ for (i = 0; i < K; i++)
+ coeff[i][j] = i+2;
+ }
+
+ foo(0, 0);
+
+ for (k = 0; k < K; k++)
+ if (out_max[k] != check_max[k] || out_min[k] != 0)
+ abort ();
+
+ foo(100, 45);
+
+ for (k = 0; k < K; k++)
+ if (out_min[k] != check_min[k] || out_max[k] != 100)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_int_max } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-4.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-4.c
new file mode 100644
index 0000000..90e0da7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-4.c
@@ -0,0 +1,56 @@
+/* { dg-require-effective-target vect_int_mult } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define K 32
+
+int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int out[K];
+int check_result[K] = {652816,670736,688656,706576,724496,742416,760336,778256,796176,814096,832016,849936,867856,885776,903696,921616,939536,957456,975376,993296,1011216,1029136,1047056,1064976,1082896,1100816,1118736,1136656,1154576,1172496,1190416,1208336};
+
+__attribute__ ((noinline)) void
+foo ()
+{
+ int sum = 0, i, j, k;
+
+ for (k = 0; k < K; k++)
+ {
+ sum = 10000;
+ for (j = 0; j < K; j++)
+ for (i = 0; i < K; i++)
+ sum += in[i+k][j] * coeff[i][j];
+
+ out[k] = sum;
+ }
+}
+
+int main ()
+{
+ int i, j, k;
+
+ check_vect ();
+
+ for (j = 0; j < K; j++)
+ {
+ for (i = 0; i < 2*K; i++)
+ in[i][j] = i+j;
+
+ for (i = 0; i < K; i++)
+ coeff[i][j] = i+2;
+ }
+
+ foo();
+
+ for (k = 0; k < K; k++)
+ if (out[k] != check_result[k])
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
new file mode 100644
index 0000000..f624d86
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
@@ -0,0 +1,58 @@
+/* { dg-require-effective-target vect_int_mult } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define K 32
+
+signed short in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+signed short coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int out[K];
+int check_result[K] = {642816,660736,678656,696576,714496,732416,750336,768256,786176,804096,822016,839936,857856,875776,893696,911616,929536,947456,965376,983296,1001216,1019136,1037056,1054976,1072896,1090816,1108736,1126656,1144576,1162496,1180416,1198336};
+
+__attribute__ ((noinline)) void
+foo ()
+{
+ int sum = 0, i, j, k;
+
+ for (k = 0; k < K; k++)
+ {
+ sum = 0;
+ for (j = 0; j < K; j++)
+ for (i = 0; i < K; i++)
+ sum += in[i+k][j] * coeff[i][j];
+
+ out[k] = sum;
+ }
+}
+
+int main ()
+{
+ int i, j, k;
+
+ check_vect ();
+
+ for (j = 0; j < K; j++)
+ {
+ for (i = 0; i < 2*K; i++)
+ in[i][j] = i+j;
+
+ for (i = 0; i < K; i++)
+ coeff[i][j] = i+2;
+ }
+
+ foo();
+
+ for (k = 0; k < K; k++)
+ if (out[k] != check_result[k])
+ abort ();
+
+ return 0;
+}
+
+/* Vectorization of loops with multiple types and double reduction is not
+ supported yet. */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-6.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-6.c
new file mode 100644
index 0000000..f52b32b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-6.c
@@ -0,0 +1,50 @@
+/* { dg-require-effective-target vect_int_mult } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define K 4
+
+int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int out[K];
+int check_result[K] = {0,16,256,4096};
+
+__attribute__ ((noinline)) void
+foo ()
+{
+ int sum;
+ int i, j, k;
+
+ for (k = 0; k < K; k++)
+ {
+ sum = 1;
+ for (j = 0; j < K; j++)
+ for (i = 0; i < K; i++)
+ sum *= in[i+k][j];
+ out[k] = sum;
+ }
+}
+
+int main ()
+{
+ int i, j, k;
+
+ check_vect ();
+
+ for (i = 0; i < 2*K; i++)
+ for (j = 0; j < K; j++)
+ in[i][j] = (i+2)/3;
+
+ foo();
+
+ for (k = 0; k < K; k++)
+ if (out[k] != check_result[k])
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-7.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-7.c
new file mode 100644
index 0000000..9e7ced7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-7.c
@@ -0,0 +1,65 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define K 32
+
+int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+int out[K];
+int check_result[K] = {63,63,191,191,127,127,191,191,127,127,191,191,127,127,191,191,127,127,191,191,127,127,191,191,127,127,191,191,127,127,191,191};
+
+__attribute__ ((noinline)) void
+foo ()
+{
+ int res_or, res_and, res_xor, i, j, k;
+
+ for (k = 0; k < K; k++)
+ {
+ res_or = 0;
+ for (j = 0; j < K; j++)
+ for (i = 0; i < K; i++)
+ res_or = res_or | in[i+k][j];
+
+ res_and = 1;
+ for (j = 0; j < K; j++)
+ for (i = 0; i < K; i++)
+ res_and = res_and & in[i+k][j];
+
+ res_xor = 0;
+ for (j = 0; j < K; j++)
+ for (i = 0; i < K; i++)
+ res_xor = res_xor ^ in[i+k][j];
+
+ out[k] = res_or + res_and + res_xor;
+ }
+}
+
+int main ()
+{
+ int i, j, k;
+
+ check_vect ();
+
+ for (j = 0; j < K; j++)
+ {
+ for (i = 0; i < 2*K; i++)
+ in[i][j] = i+j;
+
+ for (i = 0; i < K; i++)
+ out[i] = i+j;
+ }
+
+ foo();
+
+ for (k = 0; k < K; k++)
+ if (out[k] != check_result[k])
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 3 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 5f11fc7..28c96a2 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -284,13 +284,15 @@ loop_parallel_p (struct loop *loop, htab_t reduction_list,
{
gimple phi = gsi_stmt (gsi);
gimple reduc_stmt = NULL;
+ bool dummy;
/* ??? TODO: Change this into a generic function that
recognizes reductions. */
if (!is_gimple_reg (PHI_RESULT (phi)))
continue;
if (simple_loop_info)
- reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true);
+ reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true,
+ &dummy);
/* Create a reduction_info struct, initialize it and insert it to
the reduction list. */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index a37e3c0..c96fb04 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -291,8 +291,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
}
else
{
-
- gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
+ gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)
&& !is_pattern_stmt_p (stmt_info));
scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
@@ -410,6 +409,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
tree dumy;
VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
gimple_stmt_iterator gsi;
+ bool double_reduc;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
@@ -477,26 +477,39 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
- reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi, !nested_cycle);
+ reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi, !nested_cycle,
+ &double_reduc);
if (reduc_stmt)
{
- if (nested_cycle)
+ if (double_reduc)
{
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "Detected vectorizable nested cycle.");
+ fprintf (vect_dump, "Detected double reduction.");
- STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
+ STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
- vect_nested_cycle;
+ vect_double_reduction_def;
}
- else
+ else
{
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "Detected reduction.");
+ if (nested_cycle)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Detected vectorizable nested cycle.");
- STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
- STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
- vect_reduction_def;
+ STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
+ STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
+ vect_nested_cycle;
+ }
+ else
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Detected reduction.");
+
+ STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
+ STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
+ vect_reduction_def;
+ }
}
}
else
@@ -1111,10 +1124,13 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
/* inner-loop loop-closed exit phi in outer-loop vectorization
(i.e. a phi in the tail of the outer-loop).
FORNOW: we currently don't support the case that these phis
- are not used in the outerloop, cause this case requires
- to actually do something here. */
- if (!STMT_VINFO_RELEVANT_P (stmt_info)
- || STMT_VINFO_LIVE_P (stmt_info))
+ are not used in the outerloop (unless it is double reduction,
+ i.e., this phi is vect_reduction_def), cause this case
+ requires to actually do something here. */
+ if ((!STMT_VINFO_RELEVANT_P (stmt_info)
+ || STMT_VINFO_LIVE_P (stmt_info))
+ && STMT_VINFO_DEF_TYPE (stmt_info)
+ != vect_double_reduction_def)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump,
@@ -1466,31 +1482,40 @@ vect_analyze_loop (struct loop *loop)
Output:
REDUC_CODE - the corresponding tree-code to be used to reduce the
vector of partial results into a single scalar result (which
- will also reside in a vector).
+ will also reside in a vector) or ERROR_MARK if the operation is
+ a supported reduction operation, but does not have such tree-code.
- Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise. */
+ Return FALSE if CODE currently cannot be vectorized as reduction. */
static bool
reduction_code_for_scalar_code (enum tree_code code,
enum tree_code *reduc_code)
{
switch (code)
- {
- case MAX_EXPR:
- *reduc_code = REDUC_MAX_EXPR;
- return true;
-
- case MIN_EXPR:
- *reduc_code = REDUC_MIN_EXPR;
- return true;
-
- case PLUS_EXPR:
- *reduc_code = REDUC_PLUS_EXPR;
- return true;
-
- default:
- return false;
- }
+ {
+ case MAX_EXPR:
+ *reduc_code = REDUC_MAX_EXPR;
+ return true;
+
+ case MIN_EXPR:
+ *reduc_code = REDUC_MIN_EXPR;
+ return true;
+
+ case PLUS_EXPR:
+ *reduc_code = REDUC_PLUS_EXPR;
+ return true;
+
+ case MULT_EXPR:
+ case MINUS_EXPR:
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ case BIT_AND_EXPR:
+ *reduc_code = ERROR_MARK;
+ return true;
+
+ default:
+ return false;
+ }
}
@@ -1507,7 +1532,7 @@ report_vect_op (gimple stmt, const char *msg)
/* Function vect_is_simple_reduction
- Detect a cross-iteration def-use cycle that represents a simple
+ (1) Detect a cross-iteration def-use cycle that represents a simple
reduction computation. We look for the following pattern:
loop_header:
@@ -1524,12 +1549,20 @@ report_vect_op (gimple stmt, const char *msg)
Condition 1 is tested here.
Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
- Also detect a cross-iteration def-use cycle in nested loops, i.e., nested
- cycles, if CHECK_REDUCTION is false. */
+ (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
+ nested cycles, if CHECK_REDUCTION is false.
+
+ (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
+ reductions:
+
+ a1 = phi < a0, a2 >
+ inner loop (def of a3)
+ a2 = phi < a3 >
+*/
gimple
vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
- bool check_reduction)
+ bool check_reduction, bool *double_reduc)
{
struct loop *loop = (gimple_bb (phi))->loop_father;
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
@@ -1543,6 +1576,9 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
tree name;
imm_use_iterator imm_iter;
use_operand_p use_p;
+ bool phi_def;
+
+ *double_reduc = false;
/* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
otherwise, we assume outer loop vectorization. */
@@ -1584,14 +1620,24 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
return NULL;
}
- if (!is_gimple_assign (def_stmt))
+ if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
{
if (vect_print_dump_info (REPORT_DETAILS))
print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
return NULL;
}
- name = gimple_assign_lhs (def_stmt);
+ if (is_gimple_assign (def_stmt))
+ {
+ name = gimple_assign_lhs (def_stmt);
+ phi_def = false;
+ }
+ else
+ {
+ name = PHI_RESULT (def_stmt);
+ phi_def = true;
+ }
+
nloop_uses = 0;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
{
@@ -1608,6 +1654,37 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
}
}
+ /* If DEF_STMT is a phi node itself, we expect it to have a single argument
+ defined in the inner loop. */
+ if (phi_def)
+ {
+ op1 = PHI_ARG_DEF (def_stmt, 0);
+
+ if (gimple_phi_num_args (def_stmt) != 1
+ || TREE_CODE (op1) != SSA_NAME)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "unsupported phi node definition.");
+
+ return NULL;
+ }
+
+ def1 = SSA_NAME_DEF_STMT (op1);
+ if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
+ && loop->inner
+ && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
+ && is_gimple_assign (def1))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ report_vect_op (def_stmt, "detected double reduction: ");
+
+ *double_reduc = true;
+ return def_stmt;
+ }
+
+ return NULL;
+ }
+
code = gimple_assign_rhs_code (def_stmt);
if (check_reduction
@@ -1697,7 +1774,6 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
return NULL;
}
-
/* Check that one def is the reduction def, defined by PHI,
the other def is either defined in the loop ("vect_internal_def"),
or it's an induction (defined by a loop-header phi-node). */
@@ -2296,7 +2372,7 @@ get_initial_def_for_induction (gimple iv_phi)
access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
gcc_assert (access_fn);
ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
- &init_expr, &step_expr);
+ &init_expr, &step_expr);
gcc_assert (ok);
pe = loop_preheader_edge (iv_loop);
@@ -2306,7 +2382,8 @@ get_initial_def_for_induction (gimple iv_phi)
/* iv_loop is nested in the loop to be vectorized. init_expr had already
been created during vectorization of previous stmts; We obtain it from
the STMT_VINFO_VEC_STMT of the defining stmt. */
- tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop));
+ tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
+ loop_preheader_edge (iv_loop));
vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
}
else
@@ -2507,18 +2584,16 @@ get_initial_def_for_induction (gimple iv_phi)
vector of partial results.
Option1 (adjust in epilog): Initialize the vector as follows:
- add: [0,0,...,0,0]
- mult: [1,1,...,1,1]
- min/max: [init_val,init_val,..,init_val,init_val]
- bit and/or: [init_val,init_val,..,init_val,init_val]
+ add/bit or/xor: [0,0,...,0,0]
+ mult/bit and: [1,1,...,1,1]
+ min/max: [init_val,init_val,..,init_val,init_val]
and when necessary (e.g. add/mult case) let the caller know
that it needs to adjust the result by init_val.
Option2: Initialize the vector as follows:
- add: [0,0,...,0,init_val]
- mult: [1,1,...,1,init_val]
- min/max: [init_val,init_val,...,init_val]
- bit and/or: [init_val,init_val,...,init_val]
+ add/bit or/xor: [init_val,0,0,...,0]
+ mult/bit and: [init_val,1,1,...,1]
+ min/max: [init_val,init_val,...,init_val]
and no adjustments are needed.
For example, for the following code:
@@ -2533,11 +2608,14 @@ get_initial_def_for_induction (gimple iv_phi)
the result at the end by 'init_val'.
FORNOW, we are using the 'adjust in epilog' scheme, because this way the
- initialization vector is simpler (same element in all entries).
+ initialization vector is simpler (same element in all entries), if
+ ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
+
A cost model should help decide between these two schemes. */
tree
-get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def)
+get_initial_def_for_reduction (gimple stmt, tree init_val,
+ tree *adjustment_def)
{
stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
@@ -2551,47 +2629,118 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def)
tree t = NULL_TREE;
int i;
bool nested_in_vect_loop = false;
+ tree init_value;
+ REAL_VALUE_TYPE real_init_val = dconst0;
+ int int_init_val = 0;
gcc_assert (vectype);
nunits = TYPE_VECTOR_SUBPARTS (vectype);
gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
|| SCALAR_FLOAT_TYPE_P (scalar_type));
+
if (nested_in_vect_loop_p (loop, stmt))
nested_in_vect_loop = true;
else
gcc_assert (loop == (gimple_bb (stmt))->loop_father);
- switch (code)
- {
- case WIDEN_SUM_EXPR:
- case DOT_PROD_EXPR:
- case PLUS_EXPR:
- case MINUS_EXPR:
- if (nested_in_vect_loop)
- *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
- else
- *adjustment_def = init_val;
- /* Create a vector of zeros for init_def. */
- if (SCALAR_FLOAT_TYPE_P (scalar_type))
- def_for_init = build_real (scalar_type, dconst0);
- else
- def_for_init = build_int_cst (scalar_type, 0);
-
- for (i = nunits - 1; i >= 0; --i)
- t = tree_cons (NULL_TREE, def_for_init, t);
- init_def = build_vector (vectype, t);
- break;
+ /* In case of double reduction we only create a vector variable to be put
+ in the reduction phi node. The actual statement creation is done in
+ vect_create_epilog_for_reduction. */
+ if (TREE_CODE (init_val) == SSA_NAME
+ && vinfo_for_stmt (SSA_NAME_DEF_STMT (init_val))
+ && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SSA_NAME_DEF_STMT (init_val)))
+ == vect_double_reduction_def)
+ {
+ *adjustment_def = NULL;
+ return vect_create_destination_var (init_val, vectype);
+ }
- case MIN_EXPR:
- case MAX_EXPR:
- *adjustment_def = NULL_TREE;
- init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
- break;
+ if (TREE_CONSTANT (init_val))
+ {
+ if (SCALAR_FLOAT_TYPE_P (scalar_type))
+ init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
+ else
+ init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
+ }
+ else
+ init_value = init_val;
- default:
- gcc_unreachable ();
- }
+ switch (code)
+ {
+ case WIDEN_SUM_EXPR:
+ case DOT_PROD_EXPR:
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ case MULT_EXPR:
+ case BIT_AND_EXPR:
+ /* ADJUSMENT_DEF is NULL when called from
+ vect_create_epilog_for_reduction to vectorize double reduction. */
+ if (adjustment_def)
+ {
+ if (nested_in_vect_loop)
+ *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
+ NULL);
+ else
+ *adjustment_def = init_val;
+ }
+
+ if (code == MULT_EXPR || code == BIT_AND_EXPR)
+ {
+ real_init_val = dconst1;
+ int_init_val = 1;
+ }
+
+ if (SCALAR_FLOAT_TYPE_P (scalar_type))
+ def_for_init = build_real (scalar_type, real_init_val);
+ else
+ def_for_init = build_int_cst (scalar_type, int_init_val);
+
+ /* Create a vector of '0' or '1' except the first element. */
+ for (i = nunits - 2; i >= 0; --i)
+ t = tree_cons (NULL_TREE, def_for_init, t);
+
+ /* Option1: the first element is '0' or '1' as well. */
+ if (adjustment_def)
+ {
+ t = tree_cons (NULL_TREE, def_for_init, t);
+ init_def = build_vector (vectype, t);
+ break;
+ }
+
+ /* Option2: the first element is INIT_VAL. */
+ t = tree_cons (NULL_TREE, init_value, t);
+ if (TREE_CONSTANT (init_val))
+ init_def = build_vector (vectype, t);
+ else
+ init_def = build_constructor_from_list (vectype, t);
+
+ break;
+
+ case MIN_EXPR:
+ case MAX_EXPR:
+ if (adjustment_def)
+ {
+ *adjustment_def = NULL_TREE;
+ init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
+ break;
+ }
+
+ for (i = nunits - 1; i >= 0; --i)
+ t = tree_cons (NULL_TREE, init_value, t);
+
+ if (TREE_CONSTANT (init_val))
+ init_def = build_vector (vectype, t);
+ else
+ init_def = build_constructor_from_list (vectype, t);
+
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
return init_def;
}
@@ -2613,6 +2762,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def)
REDUCTION_PHI is the phi-node that carries the reduction computation.
REDUC_INDEX is the index of the operand in the right hand side of the
statement that is defined by REDUCTION_PHI.
+ DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
This function:
1. Creates the reduction def-use cycle: sets the arguments for
@@ -2657,14 +2807,15 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
int ncopies,
enum tree_code reduc_code,
gimple reduction_phi,
- int reduc_index)
+ int reduc_index,
+ bool double_reduc)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
stmt_vec_info prev_phi_info;
tree vectype;
enum machine_mode mode;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
basic_block exit_bb;
tree scalar_dest;
tree scalar_type;
@@ -2694,6 +2845,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
if (nested_in_vect_loop_p (loop, stmt))
{
+ outer_loop = loop;
loop = loop->inner;
nested_in_vect_loop = true;
}
@@ -2726,7 +2878,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
the scalar def before the loop, that defines the initial value
of the reduction variable. */
vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
- &adjustment_def);
+ &adjustment_def);
phi = reduction_phi;
def = vect_def;
@@ -2744,8 +2896,8 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
{
fprintf (vect_dump, "transform reduction: created def-use cycle: ");
print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
- fprintf (vect_dump, "\n");
- print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0, TDF_SLIM);
+ fprintf (vect_dump, "\n");
+ print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0, TDF_SLIM);
}
phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
@@ -2831,15 +2983,25 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
bitsize = TYPE_SIZE (scalar_type);
bytesize = TYPE_SIZE_UNIT (scalar_type);
+ /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
+ partial results are added and not subtracted. */
+ if (code == MINUS_EXPR)
+ code = PLUS_EXPR;
/* In case this is a reduction in an inner-loop while vectorizing an outer
loop - we don't need to extract a single scalar result at the end of the
- inner-loop. The final vector of partial results will be used in the
- vectorized outer-loop, or reduced to a scalar result at the end of the
- outer-loop. */
- if (nested_in_vect_loop)
+ inner-loop (unless it is double reduction, i.e., the use of reduction is
+ outside the outer-loop). The final vector of partial results will be used
+ in the vectorized outer-loop, or reduced to a scalar result at the end of
+ the outer-loop. */
+ if (nested_in_vect_loop && !double_reduc)
goto vect_finalize_reduction;
+ /* The epilogue is created for the outer-loop, i.e., for the loop being
+ vectorized. */
+ if (double_reduc)
+ loop = outer_loop;
+
/* FORNOW */
gcc_assert (ncopies == 1);
@@ -2914,6 +3076,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
bit_offset /= 2)
{
tree bitpos = size_int (bit_offset);
+
epilog_stmt = gimple_build_assign_with_ops (shift_code, vec_dest,
new_temp, bitpos);
new_name = make_ssa_name (vec_dest, epilog_stmt);
@@ -2987,7 +3150,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
{
tree rhs;
- gcc_assert (!nested_in_vect_loop);
+ gcc_assert (!nested_in_vect_loop || double_reduc);
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "extract scalar result");
@@ -3007,6 +3170,9 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
vect_finalize_reduction:
+ if (double_reduc)
+ loop = loop->inner;
+
/* 2.5 Adjust the final result by the initial value of the reduction
variable. (When such adjustment is not needed, then
'adjustment_def' is zero). For example, if code is PLUS we create:
@@ -3016,11 +3182,6 @@ vect_finalize_reduction:
{
if (nested_in_vect_loop)
{
- /* For MINUS_EXPR we create new_temp = loop_exit_def + adjustment_def
- since the initial value is [0,0,...,0]. */
- if (code == MINUS_EXPR)
- code = PLUS_EXPR;
-
gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
new_dest = vect_create_destination_var (scalar_dest, vectype);
@@ -3055,6 +3216,7 @@ vect_finalize_reduction:
VEC_quick_push (gimple, phis, exit_phi);
}
}
+
/* We expect to have found an exit_phi because of loop-closed-ssa form. */
gcc_assert (!VEC_empty (gimple, phis));
@@ -3063,12 +3225,13 @@ vect_finalize_reduction:
if (nested_in_vect_loop)
{
stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
+ gimple vect_phi;
/* FORNOW. Currently not supporting the case that an inner-loop
reduction is not used in the outer-loop (but only outside the
- outer-loop). */
- gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
- && !STMT_VINFO_LIVE_P (stmt_vinfo));
+ outer-loop), unless it is double reduction. */
+ gcc_assert ((STMT_VINFO_RELEVANT_P (stmt_vinfo)
+ && !STMT_VINFO_LIVE_P (stmt_vinfo)) || double_reduc);
epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
@@ -3078,7 +3241,88 @@ vect_finalize_reduction:
if (adjustment_def)
STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
- continue;
+
+ if (!double_reduc
+ || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_double_reduction_def)
+ continue;
+
+ /* Handle double reduction:
+
+ stmt1: s1 = phi <s0, s2> - double reduction phi (outer loop)
+ stmt2: s3 = phi <s1, s4> - (regular) reduction phi (inner loop)
+ stmt3: s4 = use (s3) - (regular) reduction stmt (inner loop)
+ stmt4: s2 = phi <s4> - double reduction stmt (outer loop)
+
+ At that point the regular reduction (stmt2 and stmt3) is already
+ vectorized, as well as the exit phi node, stmt4.
+ Here we vectorize the phi node of double reduction, stmt1, and
+ update all relevant statements. */
+
+ /* Go through all the uses of s2 to find double reduction phi node,
+ i.e., stmt1 above. */
+ orig_name = PHI_RESULT (exit_phi);
+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
+ {
+ stmt_vec_info use_stmt_vinfo = vinfo_for_stmt (use_stmt);
+ stmt_vec_info new_phi_vinfo;
+ tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
+ basic_block bb = gimple_bb (use_stmt);
+ gimple use;
+
+ /* Check that USE_STMT is really double reduction phi node. */
+ if (gimple_code (use_stmt) != GIMPLE_PHI
+ || gimple_phi_num_args (use_stmt) != 2
+ || !use_stmt_vinfo
+ || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
+ != vect_double_reduction_def
+ || bb->loop_father != outer_loop)
+ continue;
+
+ /* Create vector phi node for double reduction:
+ vs1 = phi <vs0, vs2>
+ vs1 was created previously in this function by a call to
+ vect_get_vec_def_for_operand and is stored in vec_initial_def;
+ vs2 is defined by EPILOG_STMT, the vectorized EXIT_PHI;
+ vs0 is created here. */
+
+ /* Create vector phi node. */
+ vect_phi = create_phi_node (vec_initial_def, bb);
+ new_phi_vinfo = new_stmt_vec_info (vect_phi,
+ loop_vec_info_for_loop (outer_loop), NULL);
+ set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
+
+ /* Create vs0 - initial def of the double reduction phi. */
+ preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
+ loop_preheader_edge (outer_loop));
+ init_def = get_initial_def_for_reduction (stmt, preheader_arg,
+ NULL);
+ vect_phi_init = vect_init_vector (use_stmt, init_def, vectype,
+ NULL);
+
+ /* Update phi node arguments with vs0 and vs2. */
+ add_phi_arg (vect_phi, vect_phi_init,
+ loop_preheader_edge (outer_loop));
+ add_phi_arg (vect_phi, PHI_RESULT (epilog_stmt),
+ loop_latch_edge (outer_loop));
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "created double reduction phi node: ");
+ print_gimple_stmt (vect_dump, vect_phi, 0, TDF_SLIM);
+ }
+
+ vect_phi_res = PHI_RESULT (vect_phi);
+
+ /* Replace the use, i.e., set the correct vs1 in the regular
+ reduction phi node. FORNOW, NCOPIES is always 1, so the loop
+ is redundant. */
+ use = reduction_phi;
+ for (j = 0; j < ncopies; j++)
+ {
+ edge pr_edge = loop_preheader_edge (loop);
+ SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
+ use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
+ }
+ }
}
/* Replace the uses: */
@@ -3087,6 +3331,7 @@ vect_finalize_reduction:
FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
SET_USE (use_p, new_temp);
}
+
VEC_free (gimple, heap, phis);
}
@@ -3171,6 +3416,10 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
gimple reduc_def_stmt = NULL;
/* The default is that the reduction variable is the last in statement. */
int reduc_index = 2;
+ bool double_reduc = false, dummy;
+ basic_block def_bb;
+ struct loop * def_stmt_loop;
+ tree def_arg;
if (nested_in_vect_loop_p (loop, stmt))
{
@@ -3185,7 +3434,6 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
return false;
/* 1. Is vectorizable reduction? */
-
/* Not supportable if the reduction variable is used in the loop. */
if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
return false;
@@ -3300,10 +3548,11 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
if (orig_stmt)
gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
reduc_def_stmt,
- !nested_cycle));
+ !nested_cycle,
+ &dummy));
else
gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
- !nested_cycle));
+ !nested_cycle, &dummy));
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
return false;
@@ -3400,25 +3649,43 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
orig_code = code;
}
- if (nested_cycle)
- epilog_reduc_code = orig_code;
- else
- if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
- return false;
+ if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
+ return false;
- reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype, optab_default);
+ reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype,
+ optab_default);
if (!reduc_optab)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "no optab for reduction.");
epilog_reduc_code = ERROR_MARK;
}
- if (optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
+
+ if (reduc_optab
+ && optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "reduc op not supported by target.");
epilog_reduc_code = ERROR_MARK;
}
+
+ def_bb = gimple_bb (reduc_def_stmt);
+ def_stmt_loop = def_bb->loop_father;
+ def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
+ loop_preheader_edge (def_stmt_loop));
+ if (TREE_CODE (def_arg) == SSA_NAME
+ && vinfo_for_stmt (SSA_NAME_DEF_STMT (def_arg))
+ && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SSA_NAME_DEF_STMT (def_arg)))
+ == vect_double_reduction_def)
+ double_reduc = true;
+
+ if (double_reduc && ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types in double reduction");
+
+ return false;
+ }
if (!vec_stmt) /* transformation not required. */
{
@@ -3560,8 +3827,10 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
epilog reduction code. */
if (!single_defuse_cycle)
new_temp = gimple_assign_lhs (*vec_stmt);
+
vect_create_epilog_for_reduction (new_temp, stmt, epilog_copies,
- epilog_reduc_code, first_phi, reduc_index);
+ epilog_reduc_code, first_phi, reduc_index,
+ double_reduc);
return true;
}
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 1c9415b..891ee18 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -331,7 +331,7 @@ process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
...
inner-loop:
d = def_stmt
- outer-loop-tail-bb:
+ outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
stmt # use (d) */
else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
{
@@ -341,7 +341,8 @@ process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
switch (relevant)
{
case vect_unused_in_scope:
- relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
+ relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
+ || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
vect_used_in_outer_by_reduction : vect_unused_in_scope;
break;
@@ -393,7 +394,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
basic_block bb;
gimple phi;
bool live_p;
- enum vect_relevant relevant;
+ enum vect_relevant relevant, tmp_relevant;
+ enum vect_def_type def_type;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
@@ -465,49 +467,64 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
identify stmts that are used solely by a reduction, and therefore the
order of the results that they produce does not have to be kept. */
- if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
+ def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
+ tmp_relevant = relevant;
+ switch (def_type)
{
- enum vect_relevant tmp_relevant = relevant;
- switch (tmp_relevant)
- {
- case vect_unused_in_scope:
- gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
- relevant = vect_used_by_reduction;
- break;
+ case vect_reduction_def:
+ switch (tmp_relevant)
+ {
+ case vect_unused_in_scope:
+ relevant = vect_used_by_reduction;
+ break;
- case vect_used_by_reduction:
- if (gimple_code (stmt) == GIMPLE_PHI)
- break;
- /* fall through */
+ case vect_used_by_reduction:
+ if (gimple_code (stmt) == GIMPLE_PHI)
+ break;
+ /* fall through */
- default:
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "unsupported use of reduction.");
- VEC_free (gimple, heap, worklist);
- return false;
- }
+ default:
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "unsupported use of reduction.");
- live_p = false;
- }
- else if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle)
- {
- enum vect_relevant tmp_relevant = relevant;
- switch (tmp_relevant)
- {
- case vect_unused_in_scope:
- case vect_used_in_outer_by_reduction:
- case vect_used_in_outer:
- break;
+ VEC_free (gimple, heap, worklist);
+ return false;
+ }
- default:
+ live_p = false;
+ break;
+
+ case vect_nested_cycle:
+ if (tmp_relevant != vect_unused_in_scope
+ && tmp_relevant != vect_used_in_outer_by_reduction
+ && tmp_relevant != vect_used_in_outer)
+ {
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "unsupported use of nested cycle.");
VEC_free (gimple, heap, worklist);
return false;
- }
+ }
+
+ live_p = false;
+ break;
+
+ case vect_double_reduction_def:
+ if (tmp_relevant != vect_unused_in_scope
+ && tmp_relevant != vect_used_by_reduction)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "unsupported use of double reduction.");
+
+ VEC_free (gimple, heap, worklist);
+ return false;
+ }
+
+ live_p = false;
+ break;
- live_p = false;
+ default:
+ break;
}
FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
@@ -974,6 +991,7 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
/* Case 4: operand is defined by a loop header phi - reduction */
case vect_reduction_def:
+ case vect_double_reduction_def:
case vect_nested_cycle:
{
struct loop *loop;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 05f5e47..c7dab10 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -61,6 +61,7 @@ enum vect_def_type {
vect_internal_def,
vect_induction_def,
vect_reduction_def,
+ vect_double_reduction_def,
vect_nested_cycle,
vect_unknown_def_type
};
@@ -822,7 +823,7 @@ extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
/* In tree-vect-loop.c. */
/* FORNOW: Used in tree-parloops.c. */
extern void destroy_loop_vec_info (loop_vec_info, bool);
-extern gimple vect_is_simple_reduction (loop_vec_info, gimple, bool);
+extern gimple vect_is_simple_reduction (loop_vec_info, gimple, bool, bool *);
/* Drive for loop analysis stage. */
extern loop_vec_info vect_analyze_loop (struct loop *);
/* Drive for loop transformation stage. */