diff options
Diffstat (limited to 'gcc')
43 files changed, 2720 insertions, 314 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2b1d5ca..9470bc3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,96 @@ +2007-08-19  Dorit Nuzman  <dorit@il.ibm.com> + +	* tree-vectorizer.h (vect_is_simple_reduction): Takes a loop_vec_info +	as argument instead of struct loop. +	(nested_in_vect_loop_p): New function. +	(vect_relevant): Add enum values vect_used_in_outer_by_reduction and +	vect_used_in_outer. +	(is_loop_header_bb_p): New. Used to differentiate loop-header phis +	from other phis in the loop. +	(destroy_loop_vec_info): Add additional argument to declaration. + +	* tree-vectorizer.c (supportable_widening_operation): Also check if +	nested_in_vect_loop_p (don't allow changing the order in this case). +	(vect_is_simple_reduction): Takes a loop_vec_info as argument instead +	of struct loop. Call nested_in_vect_loop_p and don't require +	flag_unsafe_math_optimizations if it returns true. +	(new_stmt_vec_info): When setting def_type for phis differentiate  +	loop-header phis from other phis. +	(bb_in_loop_p): New function. +	(new_loop_vec_info): Inner-loop phis already have a stmt_vinfo, so just +	update their loop_vinfo.  Order of BB traversal now matters - call +	dfs_enumerate_from with bb_in_loop_p. +	(destroy_loop_vec_info): Takes additional argument to control whether +	stmt_vinfo of the loop stmts should be destroyed as well. +	(vect_is_simple_reduction): Allow the "non-reduction" use of a +	reduction stmt to be defines by a non loop-header phi. +	(vectorize_loops): Call destroy_loop_vec_info with additional argument. + +	* tree-vect-transform.c (vectorizable_reduction): Call +	nested_in_vect_loop_p. Check for multitypes in the inner-loop. +	(vectorizable_call): Likewise. +	(vectorizable_conversion): Likewise. +	(vectorizable_operation): Likewise. +	(vectorizable_type_promotion): Likewise. +	(vectorizable_type_demotion): Likewise. +	(vectorizable_store): Likewise. +	(vectorizable_live_operation): Likewise. +	(vectorizable_reduction): Likewise. Also pass loop_info to +	vect_is_simple_reduction instead of loop. +	(vect_init_vector): Call nested_in_vect_loop_p. +	(get_initial_def_for_reduction): Likewise. +	(vect_create_epilog_for_reduction): Likewise. +	(vect_init_vector): Check which loop to work with, in case there's an +	inner-loop. +	(get_initial_def_for_inducion): Extend to handle outer-loop +	vectorization. Fix indentation. +	(vect_get_vec_def_for_operand): Support phis in the case vect_loop_def. +	In the case vect_induction_def get the vector def from the induction +	phi node, instead of calling get_initial_def_for_inducion. +	(get_initial_def_for_reduction): Extend to handle outer-loop  +	vectorization. +	(vect_create_epilog_for_reduction): Extend to handle outer-loop +	vectorization. +	(vect_transform_loop): Change assert to just skip this case.  Add a +	dump printout. +	(vect_finish_stmt_generation): Add a couple asserts. + +	(vect_estimate_min_profitable_iters): Multiply +	cost of inner-loop stmts (in outer-loop vectorization) by estimated +	inner-loop bound. +	(vect_model_reduction_cost): Don't add reduction epilogue cost in case +	this is an inner-loop reduction in outer-loop vectorization. + +	* tree-vect-analyze.c (vect_analyze_scalar_cycles_1): New function. +	Same code as what used to be vect_analyze_scalar_cycles, only with +	additional argument loop, and loop_info passed to +	vect_is_simple_reduction instead of loop. +	(vect_analyze_scalar_cycles): Code factored out into +	vect_analyze_scalar_cycles_1. Call it for each relevant loop-nest. +	Updated documentation. +	(analyze_operations): Check for inner-loop loop-closed exit-phis during +	outer-loop vectorization that are live or not used in the outerloop, +	cause this requires special handling. +	(vect_enhance_data_refs_alignment): Don't consider versioning for +	nested-loops. +	(vect_analyze_data_refs): Check that there are no datarefs in the +	inner-loop. +	(vect_mark_stmts_to_be_vectorized): Also consider vect_used_in_outer +	and vect_used_in_outer_by_reduction cases. +	(process_use): Also consider the case of outer-loop stmt defining an +	inner-loop stmt and vice versa. +	(vect_analyze_loop_1): New function. +	(vect_analyze_loop_form): Extend, to allow a restricted form of nested +	loops.  Call vect_analyze_loop_1. +	(vect_analyze_loop): Skip (inner-)loops within outer-loops that have +	been vectorized.  Call destroy_loop_vec_info with additional argument. + +	* tree-vect-patterns.c (vect_recog_widen_sum_pattern): Don't allow +	in the inner-loop when doing outer-loop vectorization. Add +	documentation and printout. +	(vect_recog_dot_prod_pattern): Likewise. Also add check for +	GIMPLE_MODIFY_STMT (in case we encounter a phi in the loop). +  2007-08-18  Andrew Pinski  <pinskia@gmail.com>  	* tree-affine.h (print_aff): New prototype. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 61f0352..1a34a13 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,47 @@  2007-08-19  Dorit Nuzman  <dorit@il.ibm.com> +	* gcc.dg/vect/vect.exp: Compile tests with -fno-tree-scev-cprop +	and -fno-tree-reassoc. +	* gcc.dg/vect/no-tree-scev-cprop-vect-iv-1.c: Moved to... +	* gcc.dg/vect/no-scevccp-vect-iv-1.c: New test. +	* gcc.dg/vect/no-tree-scev-cprop-vect-iv-2.c: Moved to... +	* gcc.dg/vect/no-scevccp-vect-iv-2.c: New test. +	* gcc.dg/vect/no-tree-scev-cprop-vect-iv-3.c: Moved to... +	* gcc.dg/vect/no-scevccp-vect-iv-3.c: New test. +	* gcc.dg/vect/no-scevccp-noreassoc-outer-1.c: New test. +	* gcc.dg/vect/no-scevccp-noreassoc-outer-2.c: New test. +	* gcc.dg/vect/no-scevccp-noreassoc-outer-3.c: New test. +	* gcc.dg/vect/no-scevccp-noreassoc-outer-4.c: New test. +	* gcc.dg/vect/no-scevccp-noreassoc-outer-5.c: New test. +	* gcc.dg/vect/no-scevccp-outer-1.c: New test. +	* gcc.dg/vect/no-scevccp-outer-2.c: New test. +	* gcc.dg/vect/no-scevccp-outer-3.c: New test. +	* gcc.dg/vect/no-scevccp-outer-4.c: New test. +	* gcc.dg/vect/no-scevccp-outer-5.c: New test. +	* gcc.dg/vect/no-scevccp-outer-6.c: New test. +	* gcc.dg/vect/no-scevccp-outer-7.c: New test. +	* gcc.dg/vect/no-scevccp-outer-8.c: New test. +	* gcc.dg/vect/no-scevccp-outer-9.c: New test. +	* gcc.dg/vect/no-scevccp-outer-9a.c: New test. +	* gcc.dg/vect/no-scevccp-outer-9b.c: New test. +	* gcc.dg/vect/no-scevccp-outer-10.c: New test. +	* gcc.dg/vect/no-scevccp-outer-10a.c: New test. +	* gcc.dg/vect/no-scevccp-outer-10b.c: New test. +	* gcc.dg/vect/no-scevccp-outer-11.c: New test. +	* gcc.dg/vect/no-scevccp-outer-12.c: New test. +	* gcc.dg/vect/no-scevccp-outer-13.c: New test. +	* gcc.dg/vect/no-scevccp-outer-14.c: New test. +	* gcc.dg/vect/no-scevccp-outer-15.c: New test. +	* gcc.dg/vect/no-scevccp-outer-16.c: New test. +	* gcc.dg/vect/no-scevccp-outer-17.c: New test. +	* gcc.dg/vect/no-scevccp-outer-18.c: New test. +	* gcc.dg/vect/no-scevccp-outer-19.c: New test. +	* gcc.dg/vect/no-scevccp-outer-20.c: New test. +	* gcc.dg/vect/no-scevccp-outer-21.c: New test. +	* gcc.dg/vect/no-scevccp-outer-22.c: New test. + +2007-08-19  Dorit Nuzman  <dorit@il.ibm.com> +  	* testsuite/gcc.dg/vect/pr20122.c: Fix test (now vectorized, with  	versioning for aliasing).  	* testsuite/gcc.dg/vect/vect-35.c: Likewise. diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-1.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-1.c new file mode 100644 index 0000000..7c5b27a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-1.c @@ -0,0 +1,50 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +int +foo (){ +  int i,j,k=0; +  int sum,x; + +  for (i = 0; i < N; i++) { +    sum = 0; +    for (j = 0; j < N; j++) { +      sum += (i + j); +      i++; +    } +    a[k++] = sum; +  } +} + +int main (void) +{ +  int i,j,k=0; +  int sum; + +  check_vect (); + +  foo (); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++){ +        sum += (j + i); +	i++; +      } +      if (a[k++] != sum) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-2.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-2.c new file mode 100644 index 0000000..109ec24 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-2.c @@ -0,0 +1,49 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 +int a[200*N]; + +void +foo (){ +  int i,j; +  int sum,s=0; + +  for (i = 0; i < 200*N; i++) { +    sum = 0; +    for (j = 0; j < N; j++) { +      sum += (i + j); +      i++; +    } +    a[i] = sum; +  } +} + +int main (void) +{ +  int i,j,k=0; +  int sum,s=0; + +  check_vect (); + +  foo (); + +    /* check results:  */ +  for (i=0; i<200*N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++){ +        sum += (j + i); +	i++; +      } +      if (a[i] != sum) +	abort (); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-3.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-3.c new file mode 100644 index 0000000..831bace --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-3.c @@ -0,0 +1,48 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +int +foo (){ +  int i,j; +  int sum,x; + +  for (i = 0; i < N; i++) { +    sum = 0; +    for (j = 0; j < N; j++) { +      sum += (i + j); +    } +    a[i] = sum; +  } +} + +int main (void) +{ +  int i,j; +  int sum; + +  check_vect (); + +  foo (); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++){ +        sum += (j + i); +      } +      if (a[i] != sum) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-4.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-4.c new file mode 100644 index 0000000..1a8cf63 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-4.c @@ -0,0 +1,56 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int +foo (){ +  int i,j; +  int sum,s=0; + +  for (i = 0; i < 200*N; i++) { +    sum = 0; +    for (j = 0; j < N; j++) { +      sum += (i + j); +      i++; +    } +    s += sum; +  } +  return s; +} + +int bar (int i, int j) +{ +return (i + j); +} + +int main (void) +{ +  int i,j,k=0; +  int sum,s=0; +  int res;  + +  check_vect (); + +  res = foo (); + +    /* check results:  */ +  for (i=0; i<200*N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++){ +        sum += bar (i, j); +	i++; +      } +      s += sum; +    } +  if (res != s) +    abort (); + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-5.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-5.c new file mode 100644 index 0000000..59c93b9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-outer-5.c @@ -0,0 +1,54 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +int +foo (){ +  int i,j; +  int sum,x; + +  for (i = 0; i < N; i++) { +    sum = 0; +    x = a[i]; +    for (j = 0; j < N; j++) { +      sum += (x + j); +    } +    a[i] = sum + i + x; +  } +} + +int main (void) +{ +  int i,j; +  int sum; +  int aa[N]; + +  check_vect (); + +  for (i=0; i<N; i++){ +    a[i] = i; +    aa[i] = i; +  } +  +  foo (); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++) +        sum += (j + aa[i]); +      if (a[i] != sum + i + aa[i]) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-1.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-1.c new file mode 100644 index 0000000..02c89c2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-1.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ + +#define N 40 +signed short image[N][N]; +signed short block[N][N]; + +/* memory references in the inner-loop */ + +unsigned int +foo (){ +  int i,j; +  unsigned int diff = 0; + +  for (i = 0; i < N; i++) { +    for (j = 0; j < N; j++) { +      diff += (image[i][j] - block[i][j]); +    } +  } +  return diff; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10.c new file mode 100644 index 0000000..a4ff856 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10.c @@ -0,0 +1,54 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; +int b[N]; + +int +foo (int n){ +  int i,j; +  int sum,x,y; + +  for (i = 0; i < N/2; i++) { +    sum = 0; +    x = b[2*i]; +    y = b[2*i+1]; +    for (j = 0; j < n; j++) { +      sum += j; +    } +    a[2*i] = sum + x; +    a[2*i+1] = sum + y; +  } +} + +int main (void) +{ +  int i,j; +  int sum; + +  check_vect (); + +  for (i=0; i<N; i++) +    b[i] = i; +  +  foo (N-1); + +    /* check results:  */ +  for (i=0; i<N/2; i++) +    { +      sum = 0; +      for (j = 0; j < N-1; j++) +        sum += j; +      if (a[2*i] != sum + b[2*i] || a[2*i+1] != sum + b[2*i+1]) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10a.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10a.c new file mode 100644 index 0000000..ff3333d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10a.c @@ -0,0 +1,58 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; +int b[N]; + +int +foo (int n){ +  int i,j; +  int sum,x,y; + +  if (n<=0) +    return 0; + +  for (i = 0; i < N/2; i++) { +    sum = 0; +    x = b[2*i]; +    y = b[2*i+1]; +    j = 0; +    do { +      sum += j; +    } while (++j < n); +    a[2*i] = sum + x; +    a[2*i+1] = sum + y; +  } +} + +int main (void) +{ +  int i,j; +  int sum; + +  check_vect (); + +  for (i=0; i<N; i++) +    b[i] = i; +  +  foo (N-1); + +    /* check results:  */ +  for (i=0; i<N/2; i++) +    { +      sum = 0; +      for (j = 0; j < N-1; j++) +        sum += j; +      if (a[2*i] != sum + b[2*i] || a[2*i+1] != sum + b[2*i+1]) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10b.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10b.c new file mode 100644 index 0000000..4aba6d8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-10b.c @@ -0,0 +1,57 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; +int b[N]; + +int +foo (int n){ +  int i,j; +  int sum,x,y; + +  if (n<=0) +    return 0; + +  for (i = 0; i < N/2; i++) { +    sum = 0; +    x = b[2*i]; +    y = b[2*i+1]; +    for (j = 0; j < n; j++) { +      sum += j; +    } +    a[2*i] = sum + x; +    a[2*i+1] = sum + y; +  } +} + +int main (void) +{ +  int i,j; +  int sum; + +  check_vect (); + +  for (i=0; i<N; i++) +    b[i] = i; +  +  foo (N-1); + +    /* check results:  */ +  for (i=0; i<N/2; i++) +    { +      sum = 0; +      for (j = 0; j < N-1; j++) +        sum += j; +      if (a[2*i] != sum + b[2*i] || a[2*i+1] != sum + b[2*i+1]) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-11.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-11.c new file mode 100644 index 0000000..ebc1735 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-11.c @@ -0,0 +1,50 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +int +foo (int n){ +  int i,j; +  int sum; + +  for (i = 0; i < n; i++) { +    sum = 0; +    for (j = 0; j < N; j++) { +      sum += j; +    } +    a[i] = sum; +  } +} + +int main (void) +{ +  int i,j; +  int sum; + +  check_vect (); + +  for (i=0; i<N; i++) +    a[i] = i; +  +  foo (N); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++) +        sum += j; +      if (a[i] != sum) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c new file mode 100644 index 0000000..85d4ec1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c @@ -0,0 +1,50 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 64 + +int a[N]; +short b[N]; + +int +foo (){ +  int i,j; +  int sum; + +  for (i = 0; i < N; i++) { +    sum = 0; +    for (j = 0; j < N; j++) { +      sum += j; +    } +    a[i] = sum; +    b[i] = (short)sum; +  } +} + +int main (void) +{ +  int i,j; +  int sum; + +  check_vect (); + +  foo (); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++) +        sum += j; +      if (a[i] != sum  || b[i] != (short)sum) +        abort(); +    } + +  return 0; +} + +/* Until we support multiple types in the inner loop  */ +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-13.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-13.c new file mode 100644 index 0000000..3e491a1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-13.c @@ -0,0 +1,67 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 16 + +unsigned short in[N]; + +unsigned int +foo (short scale){ +  int i; +  unsigned short j; +  unsigned int sum = 0; +  unsigned short sum_j; + +  for (i = 0; i < N; i++) { +    sum_j = 0; +    for (j = 0; j < N; j++) { +      sum_j += j; +    } +    sum += ((unsigned int) in[i] * (unsigned int) sum_j) >> scale; +  } +  return sum; +} + +unsigned short +bar (void) +{ +  unsigned short j; +  unsigned short sum_j; +    sum_j = 0; +    for (j = 0; j < N; j++) { +      sum_j += j; +    } +  return sum_j; +} + +int main (void) +{ +  int i; +  unsigned short j, sum_j; +  unsigned int sum = 0; +  unsigned int res; + +  check_vect (); + +  for (i=0; i<N; i++){ +    in[i] = i; +  } +  +  res = foo (2); + +  /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum_j = bar (); +      sum += ((unsigned int) in[i] * (unsigned int) sum_j) >> 2; +    } +  if (res != sum) +    abort (); + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_widen_mult_hi_to_si } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-14.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-14.c new file mode 100644 index 0000000..ea11545 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-14.c @@ -0,0 +1,61 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 64 + +unsigned short +foo (short scale){ +  int i; +  unsigned short j; +  unsigned short sum = 0; +  unsigned short sum_j; + +  for (i = 0; i < N; i++) { +    sum_j = 0; +    for (j = 0; j < N; j++) { +      sum_j += j; +    } +    sum += sum_j; +  } +  return sum; +} + +unsigned short +bar (void) +{ +  unsigned short j; +  unsigned short sum_j; +    sum_j = 0; +    for (j = 0; j < N; j++) { +      sum_j += j; +    } +  return sum_j; +} + +int main (void) +{ +  int i; +  unsigned short j, sum_j; +  unsigned short sum = 0; +  unsigned short res; + +  check_vect (); + +  res = foo (2); + +  /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum_j = bar(); +      sum += sum_j; +    } +  if (res != sum) +    abort (); + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_widen_mult_hi_to_si } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-15.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-15.c new file mode 100644 index 0000000..7eb5ff5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-15.c @@ -0,0 +1,48 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +int +foo (int x){ +  int i,j; +  int sum; + +  for (i = 0; i < N; i++) { +    sum = 0; +    for (j = 0; j < N; j++) { +      sum += j; +    } +    a[i] = sum + i + x; +  } +} + +int main (void) +{ +  int i,j; +  int sum; +  int aa[N]; + +  check_vect (); +  +  foo (3); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++) +        sum += j; +      if (a[i] != sum + i + 3) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-16.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-16.c new file mode 100644 index 0000000..7304504 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-16.c @@ -0,0 +1,62 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +int +foo (){ +  int i; +  unsigned short j; +  int sum = 0; +  unsigned short sum_j; + +  for (i = 0; i < N; i++) { +    sum += i; + +    sum_j = 0; +    for (j = 0; j < N; j++) { +      sum_j += j; +    } +    a[i] = sum_j + 5; +  } +  return sum; +} + +int main (void) +{ +  int i; +  unsigned short j, sum_j; +  int sum = 0; +  int res; + +  check_vect (); + +  for (i=0; i<N; i++) +    a[i] = i; +  +  res = foo (); + +  /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum += i; + +      sum_j = 0; +      for (j = 0; j < N; j++){ +        sum_j += j; +      } +      if (a[i] != sum_j + 5) +        abort(); +    } +  if (res != sum) +    abort (); + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-17.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-17.c new file mode 100644 index 0000000..3ddea5f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-17.c @@ -0,0 +1,68 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; +int b[N]; +int c[N]; + +int +foo (){ +  int i; +  unsigned short j; +  int sum = 0; +  unsigned short sum_j; + +  for (i = 0; i < N; i++) { +    int diff = b[i] - c[i]; + +    sum_j = 0; +    for (j = 0; j < N; j++) { +      sum_j += j; +    } +    a[i] = sum_j + 5; + +    sum += diff; +  } +  return sum; +} + +int main (void) +{ +  int i; +  unsigned short j, sum_j; +  int sum = 0; +  int res; + +  check_vect (); + +  for (i=0; i<N; i++){ +    b[i] = i; +    c[i] = 2*i; +  } +  +  res = foo (); + +  /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum += (b[i] - c[i]); + +      sum_j = 0; +      for (j = 0; j < N; j++){ +        sum_j += j; +      } +      if (a[i] != sum_j + 5) +        abort(); +    } +  if (res != sum) +    abort (); + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-18.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-18.c new file mode 100644 index 0000000..f069a98 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-18.c @@ -0,0 +1,51 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +int +foo (){ +  int i,j; +  int sum; + +  for (i = 0; i < N/2; i++) { +    sum = 0; +    for (j = 0; j < N; j++) { +      sum += j; +    } +    a[2*i] = sum; +    a[2*i+1] = 2*sum; +  } +} + +int main (void) +{ +  int i,j; +  int sum; + +  check_vect (); + +  for (i=0; i<N; i++) +    a[i] = i; +  +  foo (); + +    /* check results:  */ +  for (i=0; i<N/2; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++) +        sum += j; +      if (a[2*i] != sum || a[2*i+1] != 2*sum) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-19.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-19.c new file mode 100644 index 0000000..3a5f6cd --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-19.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 64 + +unsigned short a[N]; +unsigned int b[N]; + +int +foo (){ +  unsigned short i,j; +  unsigned short sum; + +  for (i = 0; i < N; i++) { +    sum = 0; +    for (j = 0; j < N; j++) { +      sum += j; +    } +    a[i] = sum; +    b[i] = (unsigned int)sum; +  } +} + +int main (void) +{ +  int i,j; +  short sum; + +  check_vect (); + +  for (i=0; i<N; i++) +    a[i] = i; +  +  foo (); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++) +        sum += j; +      if (a[i] != sum  || b[i] != (unsigned int)sum) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-2.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-2.c new file mode 100644 index 0000000..a9ac09c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +#define N 40 + +int +foo (){ +  int i,j; +  int diff = 0; + +  for (i = 0; i < N; i++) { +    for (j = 0; j < N; j++) { +      diff += j; +    } +  } +  return diff; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-20.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-20.c new file mode 100644 index 0000000..be2b85f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-20.c @@ -0,0 +1,54 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; +int b[N]; + +int +foo (){ +  int i,j; +  int sum,x,y; + +  for (i = 0; i < N/2; i++) { +    sum = 0; +    x = b[2*i]; +    y = b[2*i+1]; +    for (j = 0; j < N; j++) { +      sum += j; +    } +    a[2*i] = sum + x; +    a[2*i+1] = sum + y; +  } +} + +int main (void) +{ +  int i,j; +  int sum; + +  check_vect (); + +  for (i=0; i<N; i++) +    b[i] = i; +  +  foo (); + +    /* check results:  */ +  for (i=0; i<N/2; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++) +        sum += j; +      if (a[2*i] != sum + b[2*i] || a[2*i+1] != sum + b[2*i+1]) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-21.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-21.c new file mode 100644 index 0000000..30f76fa --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-21.c @@ -0,0 +1,62 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +int +foo (){ +  int i; +  unsigned short j; +  int sum = 0; +  unsigned short sum_j; + +  for (i = 0; i < N; i++) { +    sum += i; + +    sum_j = i; +    for (j = 0; j < N; j++) { +      sum_j += j; +    } +    a[i] = sum_j + 5; +  } +  return sum; +} + +int main (void) +{ +  int i; +  unsigned short j, sum_j; +  int sum = 0; +  int res; + +  check_vect (); + +  for (i=0; i<N; i++) +    a[i] = i; +  +  res = foo (); + +  /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum += i; + +      sum_j = i; +      for (j = 0; j < N; j++){ +        sum_j += j; +      } +      if (a[i] != sum_j + 5) +        abort(); +    } +  if (res != sum) +    abort (); + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-22.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-22.c new file mode 100644 index 0000000..659420f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-22.c @@ -0,0 +1,54 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +int +foo (int n){ +  int i,j; +  int sum; + +  if (n<=0) +    return 0; + +  /* inner-loop index j used after the inner-loop */ +  for (i = 0; i < N; i++) { +    sum = 0; +    for (j = 0; j < n; j+=2) { +      sum += j; +    } +    a[i] = sum + j; +  } +} + +int main (void) +{ +  int i,j; +  int sum; + +  check_vect (); + +  for (i=0; i<N; i++) +    a[i] = i; +  +  foo (N); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j+=2) +        sum += j; +      if (a[i] != sum + j) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-3.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-3.c new file mode 100644 index 0000000..ba15ecf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-3.c @@ -0,0 +1,51 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +int +foo (){ +  int i,j; +  int sum; + +  /* inner-loop step > 1 */ +  for (i = 0; i < N; i++) { +    sum = 0; +    for (j = 0; j < N; j+=2) { +      sum += j; +    } +    a[i] = sum; +  } +} + +int main (void) +{ +  int i,j; +  int sum; + +  check_vect (); + +  for (i=0; i<N; i++) +    a[i] = i; +  +  foo (); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j+=2) +        sum += j; +      if (a[i] != sum) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-4.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-4.c new file mode 100644 index 0000000..c206587 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-4.c @@ -0,0 +1,55 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +/* induction variable k advances through inner and outer loops.  */ + +int +foo (int n){ +  int i,j,k=0; +  int sum; + +  if (n<=0) +    return 0; + +  for (i = 0; i < N; i++) { +    sum = 0; +    for (j = 0; j < n; j+=2) { +      sum += k++; +    } +    a[i] = sum + j; +  } +} + +int main (void) +{ +  int i,j,k=0; +  int sum; + +  check_vect (); + +  for (i=0; i<N; i++) +    a[i] = i; +  +  foo (N); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j+=2) +        sum += k++; +      if (a[i] != sum + j) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-5.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-5.c new file mode 100644 index 0000000..4b609ca --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-5.c @@ -0,0 +1,53 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +int +foo (){ +  int i,j; +  int sum; + +  for (i = 0; i < N; i++) { +    sum = 0; +    for (j = 0; j < N; j++) { +      sum += j; +    } +    a[i] += sum + i; +  } +} + +int main (void) +{ +  int i,j; +  int sum; +  int aa[N]; + +  check_vect (); + +  for (i=0; i<N; i++){ +    a[i] = i; +    aa[i] = i; +  } +  +  foo (); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++) +        sum += j; +      if (a[i] != aa[i] + sum + i) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-6.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-6.c new file mode 100644 index 0000000..d90857b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-6.c @@ -0,0 +1,56 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int +foo (int * __restrict__ b, int k){ +  int i,j; +  int sum,x; +  int a[N]; + +  for (i = 0; i < N; i++) { +    sum = b[i]; +    for (j = 0; j < N; j++) { +      sum += j; +    } +    a[i] = sum; +  } +   +  return a[k]; +} + +int main (void) +{ +  int i,j; +  int sum; +  int b[N]; +  int a[N]; + +  check_vect (); + +  for (i=0; i<N; i++) +    b[i] = i + 2; + +  for (i=0; i<N; i++) +    a[i] = foo (b,i); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = b[i]; +      for (j = 0; j < N; j++){ +        sum += j; +      } +      if (a[i] != sum) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-7.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-7.c new file mode 100644 index 0000000..ff0dc0d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-7.c @@ -0,0 +1,75 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 16 + +unsigned short in[N]; +unsigned short coef[N]; +unsigned short a[N]; + +unsigned int +foo (short scale){ +  int i; +  unsigned short j; +  unsigned int sum = 0; +  unsigned short sum_j; + +  for (i = 0; i < N; i++) { +    sum_j = 0; +    for (j = 0; j < N; j++) { +      sum_j += j; +    } +    a[i] = sum_j; +    sum += ((unsigned int) in[i] * (unsigned int) coef[i]) >> scale; +  } +  return sum; +} + +unsigned short +bar (void) +{ +  unsigned short j; +  unsigned short sum_j; + +  sum_j = 0; +  for (j = 0; j < N; j++) { +    sum_j += j; +  } + +  return sum_j; +} + +int main (void) +{ +  int i; +  unsigned short j, sum_j; +  unsigned int sum = 0; +  unsigned int res; + +  check_vect (); + +  for (i=0; i<N; i++){ +    in[i] = 2*i; +    coef[i] = i; +  } +  +  res = foo (2); + +  /* check results:  */ +  for (i=0; i<N; i++) +    { +      if (a[i] != bar ()) +	abort (); +      sum += ((unsigned int) in[i] * (unsigned int) coef[i]) >> 2; +    } +  if (res != sum) +    abort (); + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { target vect_widen_mult_hi_to_si } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-8.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-8.c new file mode 100644 index 0000000..44026dd --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-8.c @@ -0,0 +1,50 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + + +int +foo (int *a){ +  int i,j; +  int sum; + +  for (i = 0; i < N; i++) { +    sum = 0; +    for (j = 0; j < N; j++) { +      sum += j; +    } +    a[i] = sum; +  } +} + +int main (void) +{ +  int i,j; +  int sum; +  int a[N]; + +  check_vect (); + +  for (i=0; i<N; i++) +    a[i] = i; +  +  foo (a); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++) +        sum += j; +      if (a[i] != sum) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-9.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-9.c new file mode 100644 index 0000000..16b014c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-9.c @@ -0,0 +1,50 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +int +foo (int n){ +  int i,j; +  int sum; + +  for (i = 0; i < N; i++) { +    sum = 0; +    for (j = 0; j < n; j++) { +      sum += j; +    } +    a[i] = sum; +  } +} + +int main (void) +{ +  int i,j; +  int sum; + +  check_vect (); + +  for (i=0; i<N; i++) +    a[i] = i; +  +  foo (N); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++) +        sum += j; +      if (a[i] != sum) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-9a.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-9a.c new file mode 100644 index 0000000..93b0864 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-9a.c @@ -0,0 +1,54 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +int +foo (int n){ +  int i,j; +  int sum; + +  if (n<=0) +    return 0; + +  for (i = 0; i < N; i++) { +    sum = 0; +    j = 0; +    do { +      sum += j; +    }while (++j < n); +    a[i] = sum; +  } +} + +int main (void) +{ +  int i,j; +  int sum; + +  check_vect (); + +  for (i=0; i<N; i++) +    a[i] = i; +  +  foo (N); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++) +        sum += j; +      if (a[i] != sum) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-9b.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-9b.c new file mode 100644 index 0000000..5935599 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-9b.c @@ -0,0 +1,53 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 40 + +int a[N]; + +int +foo (int n){ +  int i,j; +  int sum; + +  if (n<=0) +    return 0; + +  for (i = 0; i < N; i++) { +    sum = 0; +    for (j = 0; j < n; j++) { +      sum += j; +    } +    a[i] = sum; +  } +} + +int main (void) +{ +  int i,j; +  int sum; + +  check_vect (); + +  for (i=0; i<N; i++) +    a[i] = i; +  +  foo (N); + +    /* check results:  */ +  for (i=0; i<N; i++) +    { +      sum = 0; +      for (j = 0; j < N; j++) +        sum += j; +      if (a[i] != sum) +        abort(); +    } + +  return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-tree-scev-cprop-vect-iv-1.c b/gcc/testsuite/gcc.dg/vect/no-tree-scev-cprop-vect-iv-1.c index 60c6ff5..e69de29 100644 --- a/gcc/testsuite/gcc.dg/vect/no-tree-scev-cprop-vect-iv-1.c +++ b/gcc/testsuite/gcc.dg/vect/no-tree-scev-cprop-vect-iv-1.c @@ -1,34 +0,0 @@ -/* { dg-require-effective-target vect_int } */ - -#include <stdarg.h> -#include "tree-vect.h" - -#define N 26 -  -int main1 (int X) -{   -  int s = X; -  int i; - -  /* vectorization of reduction with induction.  -     Need -fno-tree-scev-cprop or else the loop is eliminated.  */ -  for (i = 0; i < N; i++) -    s += i; - -  return s; -} - -int main (void) -{  -  int s; -  check_vect (); -   -  s = main1 (3); -  if (s != 328) -    abort (); - -  return 0; -}  - -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-tree-scev-cprop-vect-iv-2.c b/gcc/testsuite/gcc.dg/vect/no-tree-scev-cprop-vect-iv-2.c index 5e1c7b8..e69de29 100644 --- a/gcc/testsuite/gcc.dg/vect/no-tree-scev-cprop-vect-iv-2.c +++ b/gcc/testsuite/gcc.dg/vect/no-tree-scev-cprop-vect-iv-2.c @@ -1,49 +0,0 @@ -/* { dg-require-effective-target vect_int } */ - -#include <stdarg.h> -#include "tree-vect.h" - -#define N 16 -  -int main1 () -{   -  int arr1[N]; -  int k = 0; -  int m = 3, i = 0; -   -  /* Vectorization of induction that is used after the loop.   -     Currently vectorizable because scev_ccp disconnects the -     use-after-the-loop from the iv def inside the loop.  */ - -   do {  -        k = k + 2; -        arr1[i] = k; -	m = m + k; -	i++; -   } while (i < N); - -  /* check results:  */ -  for (i = 0; i < N; i++) -    {  -      if (arr1[i] != 2+2*i) -        abort (); -    } - -  return m + k; -} - -int main (void) -{  -  int res; - -  check_vect (); -   -  res = main1 (); -  if (res != 32 + 275) -    abort (); - -  return 0; -}  - -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ -/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-tree-scev-cprop-vect-iv-3.c b/gcc/testsuite/gcc.dg/vect/no-tree-scev-cprop-vect-iv-3.c index cdfaa6f..e69de29 100644 --- a/gcc/testsuite/gcc.dg/vect/no-tree-scev-cprop-vect-iv-3.c +++ b/gcc/testsuite/gcc.dg/vect/no-tree-scev-cprop-vect-iv-3.c @@ -1,27 +0,0 @@ -/* { dg-do compile } */ -/* { dg-require-effective-target vect_int } */ - -#include <stdarg.h> -#include "tree-vect.h" - -#define N 26 -  -unsigned int main1 () -{   -  unsigned short i; -  unsigned int intsum = 0; - -  /* vectorization of reduction with induction, and widenning sum:  -     sum shorts into int.  -     Need -fno-tree-scev-cprop or else the loop is eliminated.  */ -  for (i = 0; i < N; i++) -    { -      intsum += i; -    }  - -  return intsum; -} - -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_sum_hi_to_si } } } */ -/* { dg-final { scan-tree-dump-times "vect_recog_widen_sum_pattern: detected" 1 "vect" { target vect_widen_sum_hi_to_si } } } */ -/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-sum.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-sum.c index 668b20c..d86dafe 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-sum.c +++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-sum.c @@ -42,4 +42,5 @@ int main (void)  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */  /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect.exp b/gcc/testsuite/gcc.dg/vect/vect.exp index 2208f72..fba5aa8 100644 --- a/gcc/testsuite/gcc.dg/vect/vect.exp +++ b/gcc/testsuite/gcc.dg/vect/vect.exp @@ -182,8 +182,20 @@ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-trapping-math-*.\[cS\]]]  # -fno-tree-scev-cprop  set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS  lappend DEFAULT_VECTCFLAGS "-fno-tree-scev-cprop" -dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-tree-scev-cprop-*.\[cS\]]]  \ -	"" $DEFAULT_VECTCFLAGS +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-scevccp-vect-*.\[cS\]]]  \ +        "" $DEFAULT_VECTCFLAGS + +# -fno-tree-scev-cprop +set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS +lappend DEFAULT_VECTCFLAGS "-fno-tree-scev-cprop" +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-scevccp-outer-*.\[cS\]]]  \ +        "" $DEFAULT_VECTCFLAGS + +# -fno-tree-scev-cprop -fno-tree-reassoc +set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS +lappend DEFAULT_VECTCFLAGS "-fno-tree-scev-cprop" "-fno-tree-reassoc" +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-scevccp-noreassoc-*.\[cS\]]]  \ +        "" $DEFAULT_VECTCFLAGS  # -fno-tree-dominator-opts  set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c index cc43ad6..5fb5462 100644 --- a/gcc/tree-vect-analyze.c +++ b/gcc/tree-vect-analyze.c @@ -325,6 +325,24 @@ vect_analyze_operations (loop_vec_info loop_vinfo)  	      print_generic_expr (vect_dump, phi, TDF_SLIM);  	    } +	  if (! is_loop_header_bb_p (bb)) +	    { +	      /* inner-loop loop-closed exit phi in outer-loop vectorization +		 (i.e. a phi in the tail of the outer-loop).  +		 FORNOW: we currently don't support the case that these phis +		 are not used in the outerloop, cause this case requires +		 to actually do something here.  */ +	      if (!STMT_VINFO_RELEVANT_P (stmt_info)  +		  || STMT_VINFO_LIVE_P (stmt_info)) +		{ +		  if (vect_print_dump_info (REPORT_DETAILS)) +		    fprintf (vect_dump,  +			     "Unsupported loop-closed phi in outer-loop."); +		  return false; +		} +	      continue; +	    } +  	  gcc_assert (stmt_info);  	  if (STMT_VINFO_LIVE_P (stmt_info)) @@ -398,7 +416,9 @@ vect_analyze_operations (loop_vec_info loop_vinfo)  	      break;  	    case vect_reduction_def: -	      gcc_assert (relevance == vect_unused_in_loop); +	      gcc_assert (relevance == vect_used_in_outer +			  || relevance == vect_used_in_outer_by_reduction +			  || relevance == vect_unused_in_loop);  	      break;	  	    case vect_induction_def: @@ -589,50 +609,17 @@ exist_non_indexing_operands_for_use_p (tree use, tree stmt)  } -/* Function vect_analyze_scalar_cycles. - -   Examine the cross iteration def-use cycles of scalar variables, by -   analyzing the loop (scalar) PHIs; Classify each cycle as one of the -   following: invariant, induction, reduction, unknown. -    -   Some forms of scalar cycles are not yet supported. - -   Example1: reduction: (unsupported yet) - -              loop1: -              for (i=0; i<N; i++) -                 sum += a[i]; - -   Example2: induction: (unsupported yet) - -              loop2: -              for (i=0; i<N; i++) -                 a[i] = i; - -   Note: the following loop *is* vectorizable: - -              loop3: -              for (i=0; i<N; i++) -                 a[i] = b[i]; - -         even though it has a def-use cycle caused by the induction variable i: - -              loop: i_2 = PHI (i_0, i_1) -                    a[i_2] = ...; -                    i_1 = i_2 + 1; -                    GOTO loop; +/* Function vect_analyze_scalar_cycles_1. -         because the def-use cycle in loop3 is considered "not relevant" - i.e., -         it does not need to be vectorized because it is only used for array -         indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in -         loop2 on the other hand is relevant (it is being written to memory). -*/ +   Examine the cross iteration def-use cycles of scalar variables +   in LOOP. LOOP_VINFO represents the loop that is noe being +   considered for vectorization (can be LOOP, or an outer-loop +   enclosing LOOP).  */  static void -vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) +vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)  {    tree phi; -  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);    basic_block bb = loop->header;    tree dumy;    VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64); @@ -698,7 +685,7 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)        gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));        gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type); -      reduc_stmt = vect_is_simple_reduction (loop, phi); +      reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);        if (reduc_stmt)          {            if (vect_print_dump_info (REPORT_DETAILS)) @@ -717,6 +704,48 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)  } +/* Function vect_analyze_scalar_cycles. + +   Examine the cross iteration def-use cycles of scalar variables, by +   analyzing the loop-header PHIs of scalar variables; Classify each  +   cycle as one of the following: invariant, induction, reduction, unknown. +   We do that for the loop represented by LOOP_VINFO, and also to its +   inner-loop, if exists. +   Examples for scalar cycles: + +   Example1: reduction: + +              loop1: +              for (i=0; i<N; i++) +                 sum += a[i]; + +   Example2: induction: + +              loop2: +              for (i=0; i<N; i++) +                 a[i] = i;  */ + +static void +vect_analyze_scalar_cycles (loop_vec_info loop_vinfo) +{ +  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + +  vect_analyze_scalar_cycles_1 (loop_vinfo, loop); + +  /* When vectorizing an outer-loop, the inner-loop is executed sequentially. +     Reductions in such inner-loop therefore have different properties than +     the reductions in the nest that gets vectorized: +     1. When vectorized, they are executed in the same order as in the original +        scalar loop, so we can't change the order of computation when +        vectorizing them. +     2. FIXME: Inner-loop reductions can be used in the inner-loop, so the  +        current checks are too strict.  */ + +  if (loop->inner) +    vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner); +} + +  /* Function vect_insert_into_interleaving_chain.     Insert DRA into the interleaving chain of DRB according to DRA's INIT.  */ @@ -1166,6 +1195,8 @@ vect_is_duplicate_ddr (VEC (ddr_p, heap) * may_alias_ddrs, ddr_p ddr_new)  static bool  vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)  { +  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); +    if (vect_print_dump_info (REPORT_DR_DETAILS))      {        fprintf (vect_dump, "mark for run-time aliasing test between "); @@ -1174,6 +1205,14 @@ vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)        print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);      } +  /* FORNOW: We don't support versioning with outer-loop vectorization.  */ +  if (loop->inner) +    { +      if (vect_print_dump_info (REPORT_DR_DETAILS)) +	fprintf (vect_dump, "versioning not yet supported for outer-loops."); +      return false; +    } +    /* Do not add to the list duplicate ddrs.  */    if (vect_is_duplicate_ddr (LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr))      return true; @@ -1805,7 +1844,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)       4) all misaligned data refs with a known misalignment are supported, and       5) the number of runtime alignment checks is within reason.  */ -  do_versioning = flag_tree_vect_loop_version && (!optimize_size); +  do_versioning =  +	flag_tree_vect_loop_version  +	&& (!optimize_size) +	&& (!loop->inner); /* FORNOW */    if (do_versioning)      { @@ -2188,6 +2230,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo)      {        tree stmt;        stmt_vec_info stmt_info; +      basic_block bb;        if (!dr || !DR_REF (dr))          { @@ -2200,6 +2243,16 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo)        stmt = DR_STMT (dr);        stmt_info = vinfo_for_stmt (stmt); +      /* If outer-loop vectorization: we don't yet support datarefs +	 in the innermost loop.  */ +      bb = bb_for_stmt (stmt); +      if (bb->loop_father != LOOP_VINFO_LOOP (loop_vinfo)) +	{ +	  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) +	    fprintf (vect_dump, "not vectorized: data-ref in nested loop"); +	  return false; +	} +        if (STMT_VINFO_DATA_REF (stmt_info))          {            if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) @@ -2287,11 +2340,13 @@ vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,        /* This is the last stmt in a sequence that was detected as a            pattern that can potentially be vectorized.  Don't mark the stmt -         as relevant/live because it's not going to vectorized. +         as relevant/live because it's not going to be vectorized.           Instead mark the pattern-stmt that replaces it.  */ + +      pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); +        if (vect_print_dump_info (REPORT_DETAILS))          fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live."); -      pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);        stmt_info = vinfo_for_stmt (pattern_stmt);        gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);        save_relevant = STMT_VINFO_RELEVANT (stmt_info); @@ -2341,7 +2396,8 @@ vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,    *live_p = false;    /* cond stmt other than loop exit cond.  */ -  if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo))) +  if (is_ctrl_stmt (stmt)  +      && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)       *relevant = vect_used_in_loop;    /* changing memory.  */ @@ -2398,6 +2454,8 @@ vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,     of the respective DEF_STMT is left unchanged.     - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we      skip DEF_STMT cause it had already been processed.   +   - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will +   be modified accordingly.     Return true if everything is as expected. Return false otherwise.  */ @@ -2408,7 +2466,7 @@ process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,    struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);    stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);    stmt_vec_info dstmt_vinfo; -  basic_block def_bb; +  basic_block bb, def_bb;    tree def, def_stmt;    enum vect_def_type dt; @@ -2429,17 +2487,27 @@ process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,    def_bb = bb_for_stmt (def_stmt);    if (!flow_bb_inside_loop_p (loop, def_bb)) -    return true; +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +	fprintf (vect_dump, "def_stmt is out of loop."); +      return true; +    } -  /* case 2: A reduction phi defining a reduction stmt (DEF_STMT). DEF_STMT  -     must have already been processed, so we just check that everything is as  -     expected, and we are done.  */ +  /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).  +     DEF_STMT must have already been processed, because this should be the  +     only way that STMT, which is a reduction-phi, was put in the worklist,  +     as there should be no other uses for DEF_STMT in the loop.  So we just  +     check that everything is as expected, and we are done.  */    dstmt_vinfo = vinfo_for_stmt (def_stmt); +  bb = bb_for_stmt (stmt);    if (TREE_CODE (stmt) == PHI_NODE        && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def        && TREE_CODE (def_stmt) != PHI_NODE -      && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def) +      && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def +      && bb->loop_father == def_bb->loop_father)      { +      if (vect_print_dump_info (REPORT_DETAILS)) +	fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");        if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))  	dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));        gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction); @@ -2448,6 +2516,73 @@ process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,        return true;      } +  /* case 3a: outer-loop stmt defining an inner-loop stmt: +	outer-loop-header-bb: +		d = def_stmt +	inner-loop: +		stmt # use (d) +	outer-loop-tail-bb: +		...		  */ +  if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father)) +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +	fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt."); +      switch (relevant) +	{ +	case vect_unused_in_loop: +	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ? +			vect_used_by_reduction : vect_unused_in_loop; +	  break; +	case vect_used_in_outer_by_reduction: +	  relevant = vect_used_by_reduction; +	  break; +	case vect_used_in_outer: +	  relevant = vect_used_in_loop; +	  break; +	case vect_used_by_reduction:  +	case vect_used_in_loop: +	  break; + +	default: +	  gcc_unreachable (); +	}    +    } + +  /* case 3b: inner-loop stmt defining an outer-loop stmt: +	outer-loop-header-bb: +		... +	inner-loop: +		d = def_stmt +	outer-loop-tail-bb: +		stmt # use (d)		*/ +  else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father)) +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +	fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt."); +      switch (relevant) +        { +        case vect_unused_in_loop: +          relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ? +                        vect_used_in_outer_by_reduction : vect_unused_in_loop; +          break; + +        case vect_used_in_outer_by_reduction: +        case vect_used_in_outer: +          break; + +        case vect_used_by_reduction: +          relevant = vect_used_in_outer_by_reduction; +          break; + +        case vect_used_in_loop: +          relevant = vect_used_in_outer; +          break; + +        default: +          gcc_unreachable (); +        } +    } +    vect_mark_relevant (worklist, def_stmt, relevant, live_p);    return true;  } @@ -2556,25 +2691,38 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)  	 identify stmts that are used solely by a reduction, and therefore the   	 order of the results that they produce does not have to be kept. -         Reduction phis are expected to be used by a reduction stmt;  Other  -	 reduction stmts are expected to be unused in the loop.  These are the  -	 expected values of "relevant" for reduction phis/stmts in the loop: +	 Reduction phis are expected to be used by a reduction stmt, or by +	 in an outer loop;  Other reduction stmts are expected to be +	 in the loop, and possibly used by a stmt in an outer loop.  +	 Here are the expected values of "relevant" for reduction phis/stmts:  	 relevance:				phi	stmt  	 vect_unused_in_loop				ok +	 vect_used_in_outer_by_reduction	ok	ok +	 vect_used_in_outer			ok	ok  	 vect_used_by_reduction			ok  	 vect_used_in_loop 						  */        if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)          { -	  switch (relevant) +	  enum vect_relevant tmp_relevant = relevant; +	  switch (tmp_relevant)  	    {  	    case vect_unused_in_loop:  	      gcc_assert (TREE_CODE (stmt) != PHI_NODE); +	      relevant = vect_used_by_reduction;  	      break; + +	    case vect_used_in_outer_by_reduction: +	    case vect_used_in_outer: +	      gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR +			  && TREE_CODE (stmt) != DOT_PROD_EXPR); +	      break; +  	    case vect_used_by_reduction:  	      if (TREE_CODE (stmt) == PHI_NODE)  		break; +	      /* fall through */  	    case vect_used_in_loop:  	    default:  	      if (vect_print_dump_info (REPORT_DETAILS)) @@ -2582,7 +2730,6 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)  	      VEC_free (tree, heap, worklist);  	      return false;  	    } -	  relevant = vect_used_by_reduction;  	  live_p = false;	  	} @@ -2724,11 +2871,39 @@ vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)  } +/* Function vect_analyze_loop_1. + +   Apply a set of analyses on LOOP, and create a loop_vec_info struct +   for it. The different analyses will record information in the +   loop_vec_info struct.  This is a subset of the analyses applied in +   vect_analyze_loop, to be applied on an inner-loop nested in the loop +   that is now considered for (outer-loop) vectorization.  */ + +static loop_vec_info +vect_analyze_loop_1 (struct loop *loop) +{ +  loop_vec_info loop_vinfo; + +  if (vect_print_dump_info (REPORT_DETAILS)) +    fprintf (vect_dump, "===== analyze_loop_nest_1 ====="); + +  /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */ + +  loop_vinfo = vect_analyze_loop_form (loop); +  if (!loop_vinfo) +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +        fprintf (vect_dump, "bad inner-loop form."); +      return NULL; +    } + +  return loop_vinfo; +} + +  /* Function vect_analyze_loop_form. -   Verify the following restrictions (some may be relaxed in the future): -   - it's an inner-most loop -   - number of BBs = 2 (which are the loop header and the latch) +   Verify that certain CFG restrictions hold, including:     - the loop has a pre-header     - the loop has a single entry and exit     - the loop exit condition is simple enough, and the number of iterations @@ -2740,31 +2915,134 @@ vect_analyze_loop_form (struct loop *loop)    loop_vec_info loop_vinfo;    tree loop_cond;    tree number_of_iterations = NULL; +  loop_vec_info inner_loop_vinfo = NULL;    if (vect_print_dump_info (REPORT_DETAILS))      fprintf (vect_dump, "=== vect_analyze_loop_form ==="); -  if (loop->inner) +  /* Different restrictions apply when we are considering an inner-most loop, +     vs. an outer (nested) loop.   +     (FORNOW. May want to relax some of these restrictions in the future).  */ + +  if (!loop->inner)      { -      if (vect_print_dump_info (REPORT_OUTER_LOOPS)) -        fprintf (vect_dump, "not vectorized: nested loop."); +      /* Inner-most loop.  We currently require that the number of BBs is  +	 exactly 2 (the header and latch).  Vectorizable inner-most loops  +	 look like this: + +                        (pre-header) +                           | +                          header <--------+ +                           | |            | +                           | +--> latch --+ +                           | +                        (exit-bb)  */ + +      if (loop->num_nodes != 2) +        { +          if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) +            fprintf (vect_dump, "not vectorized: too many BBs in loop."); +          return NULL; +        } + +      if (empty_block_p (loop->header)) +    { +          if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) +            fprintf (vect_dump, "not vectorized: empty loop.");        return NULL;      } +    } +  else +    { +      struct loop *innerloop = loop->inner; +      edge backedge, entryedge; + +      /* Nested loop. We currently require that the loop is doubly-nested, +	 contains a single inner loop, and the number of BBs is exactly 5.  +	 Vectorizable outer-loops look like this: + +			(pre-header) +			   | +			  header <---+ +			   |         | +		          inner-loop | +			   |         | +			  tail ------+ +			   |  +		        (exit-bb) + +	 The inner-loop has the properties expected of inner-most loops +	 as described above.  */ + +      if ((loop->inner)->inner || (loop->inner)->next) +	{ +	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) +	    fprintf (vect_dump, "not vectorized: multiple nested loops."); +	  return NULL; +	} + +      /* Analyze the inner-loop.  */ +      inner_loop_vinfo = vect_analyze_loop_1 (loop->inner); +      if (!inner_loop_vinfo) +	{ +	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) +            fprintf (vect_dump, "not vectorized: Bad inner loop."); +	  return NULL; +	} + +      if (!expr_invariant_in_loop_p (loop, +					LOOP_VINFO_NITERS (inner_loop_vinfo))) +	{ +	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) +	    fprintf (vect_dump, +		     "not vectorized: inner-loop count not invariant."); +	  destroy_loop_vec_info (inner_loop_vinfo, true); +	  return NULL; +	} + +      if (loop->num_nodes != 5)  +        { +	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) +	    fprintf (vect_dump, "not vectorized: too many BBs in loop."); +	  destroy_loop_vec_info (inner_loop_vinfo, true); +	  return NULL; +        } + +      gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2); +      backedge = EDGE_PRED (innerloop->header, 1);	   +      entryedge = EDGE_PRED (innerloop->header, 0); +      if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch) +	{ +	  backedge = EDGE_PRED (innerloop->header, 0); +	  entryedge = EDGE_PRED (innerloop->header, 1);	 +	} +	 +      if (entryedge->src != loop->header +	  || !single_exit (innerloop) +	  || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src) +	{ +	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) +	    fprintf (vect_dump, "not vectorized: unsupported outerloop form."); +	  destroy_loop_vec_info (inner_loop_vinfo, true); +	  return NULL; +	} + +      if (vect_print_dump_info (REPORT_DETAILS)) +        fprintf (vect_dump, "Considering outer-loop vectorization."); +    }    if (!single_exit (loop)  -      || loop->num_nodes != 2        || EDGE_COUNT (loop->header->preds) != 2)      {        if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))          {            if (!single_exit (loop))              fprintf (vect_dump, "not vectorized: multiple exits."); -          else if (loop->num_nodes != 2) -            fprintf (vect_dump, "not vectorized: too many BBs in loop.");            else if (EDGE_COUNT (loop->header->preds) != 2)              fprintf (vect_dump, "not vectorized: too many incoming edges.");          } - +      if (inner_loop_vinfo) +	destroy_loop_vec_info (inner_loop_vinfo, true);        return NULL;      } @@ -2777,6 +3055,8 @@ vect_analyze_loop_form (struct loop *loop)      {        if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))          fprintf (vect_dump, "not vectorized: unexpected loop form."); +      if (inner_loop_vinfo) +	destroy_loop_vec_info (inner_loop_vinfo, true);        return NULL;      } @@ -2794,22 +3074,19 @@ vect_analyze_loop_form (struct loop *loop)  	{  	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))  	    fprintf (vect_dump, "not vectorized: abnormal loop exit edge."); +	  if (inner_loop_vinfo) +	    destroy_loop_vec_info (inner_loop_vinfo, true);  	  return NULL;  	}      } -  if (empty_block_p (loop->header)) -    { -      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS)) -        fprintf (vect_dump, "not vectorized: empty loop."); -      return NULL; -    } -    loop_cond = vect_get_loop_niters (loop, &number_of_iterations);    if (!loop_cond)      {        if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))  	fprintf (vect_dump, "not vectorized: complicated exit condition."); +      if (inner_loop_vinfo) +	destroy_loop_vec_info (inner_loop_vinfo, true);        return NULL;      } @@ -2818,6 +3095,8 @@ vect_analyze_loop_form (struct loop *loop)        if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))  	fprintf (vect_dump,   		 "not vectorized: number of iterations cannot be computed."); +      if (inner_loop_vinfo) +	destroy_loop_vec_info (inner_loop_vinfo, true);        return NULL;      } @@ -2825,7 +3104,9 @@ vect_analyze_loop_form (struct loop *loop)      {        if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))          fprintf (vect_dump, "Infinite number of iterations."); -      return false; +      if (inner_loop_vinfo) +	destroy_loop_vec_info (inner_loop_vinfo, true); +      return NULL;      }    if (!NITERS_KNOWN_P (number_of_iterations)) @@ -2840,12 +3121,19 @@ vect_analyze_loop_form (struct loop *loop)      {        if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))          fprintf (vect_dump, "not vectorized: number of iterations = 0."); +      if (inner_loop_vinfo) +        destroy_loop_vec_info (inner_loop_vinfo, false);        return NULL;      }    loop_vinfo = new_loop_vec_info (loop);    LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; -  LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond; + +  STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type; + +  /* CHECKME: May want to keep it around it in the future.  */ +  if (inner_loop_vinfo) +    destroy_loop_vec_info (inner_loop_vinfo, false);    gcc_assert (!loop->aux);    loop->aux = loop_vinfo; @@ -2867,6 +3155,15 @@ vect_analyze_loop (struct loop *loop)    if (vect_print_dump_info (REPORT_DETAILS))      fprintf (vect_dump, "===== analyze_loop_nest ====="); +  if (loop_outer (loop)  +      && loop_vec_info_for_loop (loop_outer (loop)) +      && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop)))) +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +	fprintf (vect_dump, "outer-loop already vectorized."); +      return NULL; +    } +    /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */    loop_vinfo = vect_analyze_loop_form (loop); @@ -2888,7 +3185,7 @@ vect_analyze_loop (struct loop *loop)      {        if (vect_print_dump_info (REPORT_DETAILS))  	fprintf (vect_dump, "bad data references."); -      destroy_loop_vec_info (loop_vinfo); +      destroy_loop_vec_info (loop_vinfo, true);        return NULL;      } @@ -2906,7 +3203,7 @@ vect_analyze_loop (struct loop *loop)      {        if (vect_print_dump_info (REPORT_DETAILS))  	fprintf (vect_dump, "unexpected pattern."); -      destroy_loop_vec_info (loop_vinfo); +      destroy_loop_vec_info (loop_vinfo, true);        return NULL;      } @@ -2918,7 +3215,7 @@ vect_analyze_loop (struct loop *loop)      {        if (vect_print_dump_info (REPORT_DETAILS))  	fprintf (vect_dump, "bad data alignment."); -      destroy_loop_vec_info (loop_vinfo); +      destroy_loop_vec_info (loop_vinfo, true);        return NULL;      } @@ -2927,7 +3224,7 @@ vect_analyze_loop (struct loop *loop)      {        if (vect_print_dump_info (REPORT_DETAILS))          fprintf (vect_dump, "can't determine vectorization factor."); -      destroy_loop_vec_info (loop_vinfo); +      destroy_loop_vec_info (loop_vinfo, true);        return NULL;      } @@ -2939,7 +3236,7 @@ vect_analyze_loop (struct loop *loop)      {        if (vect_print_dump_info (REPORT_DETAILS))  	fprintf (vect_dump, "bad data dependence."); -      destroy_loop_vec_info (loop_vinfo); +      destroy_loop_vec_info (loop_vinfo, true);        return NULL;      } @@ -2951,7 +3248,7 @@ vect_analyze_loop (struct loop *loop)      {        if (vect_print_dump_info (REPORT_DETAILS))  	fprintf (vect_dump, "bad data access."); -      destroy_loop_vec_info (loop_vinfo); +      destroy_loop_vec_info (loop_vinfo, true);        return NULL;      } @@ -2963,7 +3260,7 @@ vect_analyze_loop (struct loop *loop)      {        if (vect_print_dump_info (REPORT_DETAILS))  	fprintf (vect_dump, "bad data alignment."); -      destroy_loop_vec_info (loop_vinfo); +      destroy_loop_vec_info (loop_vinfo, true);        return NULL;      } @@ -2975,7 +3272,7 @@ vect_analyze_loop (struct loop *loop)      {        if (vect_print_dump_info (REPORT_DETAILS))  	fprintf (vect_dump, "bad operation or unsupported loop bound."); -      destroy_loop_vec_info (loop_vinfo); +      destroy_loop_vec_info (loop_vinfo, true);        return NULL;      } diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index 054bfcb..cfae6e0 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -148,7 +148,14 @@ widened_name_p (tree name, tree use_stmt, tree *half_type, tree *def_stmt)     * Return value: A new stmt that will be used to replace the sequence of     stmts that constitute the pattern. In this case it will be:          WIDEN_DOT_PRODUCT <x_t, y_t, sum_0> -*/ + +   Note: The dot-prod idiom is a widening reduction pattern that is +         vectorized without preserving all the intermediate results. It +         produces only N/2 (widened) results (by summing up pairs of +         intermediate results) rather than all N results.  Therefore, we +         cannot allow this pattern when we want to get all the results and in +         the correct order (as is the case when this computation is in an +         inner-loop nested in an outer-loop that us being vectorized).  */  static tree  vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out) @@ -160,6 +167,8 @@ vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out)    tree type, half_type;    tree pattern_expr;    tree prod_type; +  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); +  struct loop *loop = LOOP_VINFO_LOOP (loop_info);    if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT)      return NULL; @@ -242,6 +251,10 @@ vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out)    gcc_assert (stmt_vinfo);    if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_loop_def)      return NULL; +  /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi  +     inside the loop (in case we are analyzing an outer-loop).  */ +  if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT) +    return NULL;     expr = GIMPLE_STMT_OPERAND (stmt, 1);    if (TREE_CODE (expr) != MULT_EXPR)      return NULL; @@ -295,6 +308,16 @@ vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out)        fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");        print_generic_expr (vect_dump, pattern_expr, TDF_SLIM);      } + +  /* We don't allow changing the order of the computation in the inner-loop +     when doing outer-loop vectorization.  */ +  if (nested_in_vect_loop_p (loop, last_stmt)) +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +        fprintf (vect_dump, "vect_recog_dot_prod_pattern: not allowed."); +      return NULL; +    } +    return pattern_expr;  } @@ -521,7 +544,14 @@ vect_recog_pow_pattern (tree last_stmt, tree *type_in, tree *type_out)     * Return value: A new stmt that will be used to replace the sequence of     stmts that constitute the pattern. In this case it will be:          WIDEN_SUM <x_t, sum_0> -*/ + +   Note: The widneing-sum idiom is a widening reduction pattern that is  +	 vectorized without preserving all the intermediate results. It +         produces only N/2 (widened) results (by summing up pairs of  +	 intermediate results) rather than all N results.  Therefore, we  +	 cannot allow this pattern when we want to get all the results and in  +	 the correct order (as is the case when this computation is in an  +	 inner-loop nested in an outer-loop that us being vectorized).  */  static tree  vect_recog_widen_sum_pattern (tree last_stmt, tree *type_in, tree *type_out) @@ -531,6 +561,8 @@ vect_recog_widen_sum_pattern (tree last_stmt, tree *type_in, tree *type_out)    stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);    tree type, half_type;    tree pattern_expr; +  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); +  struct loop *loop = LOOP_VINFO_LOOP (loop_info);    if (TREE_CODE (last_stmt) != GIMPLE_MODIFY_STMT)      return NULL; @@ -580,6 +612,16 @@ vect_recog_widen_sum_pattern (tree last_stmt, tree *type_in, tree *type_out)        fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");        print_generic_expr (vect_dump, pattern_expr, TDF_SLIM);      } + +  /* We don't allow changing the order of the computation in the inner-loop +     when doing outer-loop vectorization.  */ +  if (nested_in_vect_loop_p (loop, last_stmt)) +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +        fprintf (vect_dump, "vect_recog_widen_sum_pattern: not allowed."); +      return NULL; +    } +    return pattern_expr;  } diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 16beffc..6e88fa9 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -124,6 +124,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)    basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);    int nbbs = loop->num_nodes;    int byte_misalign; +  int innerloop_iters, factor;    /* Cost model disabled.  */    if (!flag_vect_cost_model) @@ -152,11 +153,20 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)       TODO: Consider assigning different costs to different scalar       statements.  */ +  /* FORNOW.  */ +  if (loop->inner) +    innerloop_iters = 50; /* FIXME */ +    for (i = 0; i < nbbs; i++)      {        block_stmt_iterator si;        basic_block bb = bbs[i]; +      if (bb->loop_father == loop->inner) + 	factor = innerloop_iters; +      else + 	factor = 1; +        for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))          {            tree stmt = bsi_stmt (si); @@ -164,8 +174,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)            if (!STMT_VINFO_RELEVANT_P (stmt_info)                && !STMT_VINFO_LIVE_P (stmt_info))              continue; -          scalar_single_iter_cost += cost_for_stmt (stmt); -          vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info); +          scalar_single_iter_cost += cost_for_stmt (stmt) * factor; +          vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor; +	  /* FIXME: for stmts in the inner-loop in outer-loop vectorization, +	     some of the "outside" costs are generated inside the outer-loop.  */            vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);          }      } @@ -1071,6 +1083,9 @@ vect_init_vector (tree stmt, tree vector_var, tree vector_type)    tree new_temp;    basic_block new_bb; +  if (nested_in_vect_loop_p (loop, stmt)) +    loop = loop->inner; +    new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");    add_referenced_var (new_var);  @@ -1096,6 +1111,7 @@ vect_init_vector (tree stmt, tree vector_var, tree vector_type)  /* Function get_initial_def_for_induction     Input: +   STMT - a stmt that performs an induction operation in the loop.     IV_PHI - the initial value of the induction variable     Output: @@ -1114,8 +1130,8 @@ get_initial_def_for_induction (tree iv_phi)    tree vectype = get_vectype_for_scalar_type (scalar_type);    int nunits =  TYPE_VECTOR_SUBPARTS (vectype);    edge pe = loop_preheader_edge (loop); +  struct loop *iv_loop;    basic_block new_bb; -  block_stmt_iterator bsi;    tree vec, vec_init, vec_step, t;    tree access_fn;    tree new_var; @@ -1129,8 +1145,13 @@ get_initial_def_for_induction (tree iv_phi)    int ncopies = vf / nunits;    tree expr;    stmt_vec_info phi_info = vinfo_for_stmt (iv_phi); +  bool nested_in_vect_loop = false;    tree stmts; -  tree stmt = NULL_TREE; +  imm_use_iterator imm_iter; +  use_operand_p use_p; +  tree exit_phi; +  edge latch_e; +  tree loop_arg;    block_stmt_iterator si;    basic_block bb = bb_for_stmt (iv_phi); @@ -1139,65 +1160,107 @@ get_initial_def_for_induction (tree iv_phi)    /* Find the first insertion point in the BB.  */    si = bsi_after_labels (bb); -  stmt = bsi_stmt (si); -  access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi)); +  if (INTEGRAL_TYPE_P (scalar_type)) +    step_expr = build_int_cst (scalar_type, 0); +  else +    step_expr = build_real (scalar_type, dconst0); + +  /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */ +  if (nested_in_vect_loop_p (loop, iv_phi)) +    { +      nested_in_vect_loop = true; +      iv_loop = loop->inner; +    } +  else +    iv_loop = loop; +  gcc_assert (iv_loop == (bb_for_stmt (iv_phi))->loop_father); + +  latch_e = loop_latch_edge (iv_loop); +  loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e); + +  access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));    gcc_assert (access_fn); -  ok = vect_is_simple_iv_evolution (loop->num, access_fn, -				    &init_expr, &step_expr); +  ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn, +                                  &init_expr, &step_expr);    gcc_assert (ok); +  pe = loop_preheader_edge (iv_loop);    /* Create the vector that holds the initial_value of the induction.  */ -  new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_"); -  add_referenced_var (new_var); - -  new_name = force_gimple_operand (init_expr, &stmts, false, new_var); -  if (stmts) +  if (nested_in_vect_loop)      { -      new_bb = bsi_insert_on_edge_immediate (pe, stmts); -      gcc_assert (!new_bb); +      /* iv_loop is nested in the loop to be vectorized.  init_expr had already +	 been created during vectorization of previous stmts; We obtain it from +	 the STMT_VINFO_VEC_STMT of the defining stmt. */ +      tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop)); +      vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);      } - -  t = NULL_TREE; -  t = tree_cons (NULL_TREE, new_name, t); -  for (i = 1; i < nunits; i++) +  else      { -      tree tmp; +      /* iv_loop is the loop to be vectorized. Create: +	 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */ +      new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_"); +      add_referenced_var (new_var); -      /* Create: new_name = new_name + step_expr  */ -      tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr); -      init_stmt = build_gimple_modify_stmt (new_var, tmp); -      new_name = make_ssa_name (new_var, init_stmt); -      GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name; +      new_name = force_gimple_operand (init_expr, &stmts, false, new_var); +      if (stmts) +	{ +	  new_bb = bsi_insert_on_edge_immediate (pe, stmts); +	  gcc_assert (!new_bb); +	} -      new_bb = bsi_insert_on_edge_immediate (pe, init_stmt); -      gcc_assert (!new_bb); +      t = NULL_TREE; +      t = tree_cons (NULL_TREE, init_expr, t); +      for (i = 1; i < nunits; i++) +	{ +	  tree tmp; -      if (vect_print_dump_info (REPORT_DETAILS)) -        { -          fprintf (vect_dump, "created new init_stmt: "); -          print_generic_expr (vect_dump, init_stmt, TDF_SLIM); -        } -      t = tree_cons (NULL_TREE, new_name, t); +	  /* Create: new_name_i = new_name + step_expr  */ +	  tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr); +	  init_stmt = build_gimple_modify_stmt (new_var, tmp); +	  new_name = make_ssa_name (new_var, init_stmt); +	  GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name; + +	  new_bb = bsi_insert_on_edge_immediate (pe, init_stmt); +	  gcc_assert (!new_bb); + +	  if (vect_print_dump_info (REPORT_DETAILS)) +	    { +	      fprintf (vect_dump, "created new init_stmt: "); +	      print_generic_expr (vect_dump, init_stmt, TDF_SLIM); +	    } +	  t = tree_cons (NULL_TREE, new_name, t); +	} +      /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */ +      vec = build_constructor_from_list (vectype, nreverse (t)); +      vec_init = vect_init_vector (iv_phi, vec, vectype);      } -  vec = build_constructor_from_list (vectype, nreverse (t)); -  vec_init = vect_init_vector (stmt, vec, vectype);    /* Create the vector that holds the step of the induction.  */ -  expr = build_int_cst (scalar_type, vf); -  new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr); +  if (nested_in_vect_loop) +    /* iv_loop is nested in the loop to be vectorized. Generate: +       vec_step = [S, S, S, S]  */ +    new_name = step_expr; +  else +    { +      /* iv_loop is the loop to be vectorized. Generate: +	  vec_step = [VF*S, VF*S, VF*S, VF*S]  */ +      expr = build_int_cst (scalar_type, vf); +      new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr); +    } +    t = NULL_TREE;    for (i = 0; i < nunits; i++)      t = tree_cons (NULL_TREE, unshare_expr (new_name), t);    vec = build_constructor_from_list (vectype, t); -  vec_step = vect_init_vector (stmt, vec, vectype); +  vec_step = vect_init_vector (iv_phi, vec, vectype);    /* Create the following def-use cycle:       loop prolog: -         vec_init = [X, X+S, X+2*S, X+3*S] -	 vec_step = [VF*S, VF*S, VF*S, VF*S] +         vec_init = ... +	 vec_step = ...       loop:           vec_iv = PHI <vec_init, vec_loop>           ... @@ -1208,7 +1271,7 @@ get_initial_def_for_induction (tree iv_phi)    /* Create the induction-phi that defines the induction-operand.  */    vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");    add_referenced_var (vec_dest); -  induction_phi = create_phi_node (vec_dest, loop->header); +  induction_phi = create_phi_node (vec_dest, iv_loop->header);    set_stmt_info (get_stmt_ann (induction_phi),                   new_stmt_vec_info (induction_phi, loop_vinfo));    induc_def = PHI_RESULT (induction_phi); @@ -1219,15 +1282,16 @@ get_initial_def_for_induction (tree iv_phi)  					       induc_def, vec_step));    vec_def = make_ssa_name (vec_dest, new_stmt);    GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def; -  bsi = bsi_for_stmt (stmt); -  vect_finish_stmt_generation (stmt, new_stmt, &bsi); +  bsi_insert_before (&si, new_stmt, BSI_SAME_STMT); +  set_stmt_info (get_stmt_ann (new_stmt), +		 new_stmt_vec_info (new_stmt, loop_vinfo));    /* Set the arguments of the phi node:  */ -  add_phi_arg (induction_phi, vec_init, loop_preheader_edge (loop)); -  add_phi_arg (induction_phi, vec_def, loop_latch_edge (loop)); +  add_phi_arg (induction_phi, vec_init, pe); +  add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop)); -  /* In case the vectorization factor (VF) is bigger than the number +  /* In case that vectorization factor (VF) is bigger than the number       of elements that we can fit in a vectype (nunits), we have to generate       more than one vector stmt - i.e - we need to "unroll" the       vector stmt by a factor VF/nunits.  For more details see documentation @@ -1236,6 +1300,8 @@ get_initial_def_for_induction (tree iv_phi)    if (ncopies > 1)      {        stmt_vec_info prev_stmt_vinfo; +      /* FORNOW. This restriction should be relaxed.  */ +      gcc_assert (!nested_in_vect_loop);        /* Create the vector that holds the step of the induction.  */        expr = build_int_cst (scalar_type, nunits); @@ -1244,7 +1310,7 @@ get_initial_def_for_induction (tree iv_phi)        for (i = 0; i < nunits; i++)  	t = tree_cons (NULL_TREE, unshare_expr (new_name), t);        vec = build_constructor_from_list (vectype, t); -      vec_step = vect_init_vector (stmt, vec, vectype); +      vec_step = vect_init_vector (iv_phi, vec, vectype);        vec_def = induc_def;        prev_stmt_vinfo = vinfo_for_stmt (induction_phi); @@ -1252,19 +1318,50 @@ get_initial_def_for_induction (tree iv_phi)  	{  	  tree tmp; -	  /* vec_i = vec_prev + vec_{step*nunits}  */ +	  /* vec_i = vec_prev + vec_step  */  	  tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);  	  new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);  	  vec_def = make_ssa_name (vec_dest, new_stmt);  	  GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def; -	  bsi = bsi_for_stmt (stmt); -	  vect_finish_stmt_generation (stmt, new_stmt, &bsi); - +	  bsi_insert_before (&si, new_stmt, BSI_SAME_STMT); +	  set_stmt_info (get_stmt_ann (new_stmt), +			 new_stmt_vec_info (new_stmt, loop_vinfo));  	  STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;  	  prev_stmt_vinfo = vinfo_for_stmt (new_stmt);   	}      } +  if (nested_in_vect_loop) +    { +      /* Find the loop-closed exit-phi of the induction, and record +         the final vector of induction results:  */ +      exit_phi = NULL; +      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg) +        { +	  if (!flow_bb_inside_loop_p (iv_loop, bb_for_stmt (USE_STMT (use_p)))) +	    { +	      exit_phi = USE_STMT (use_p); +	      break; +	    } +        } +      if (exit_phi)  +	{ +	  stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi); +	  /* FORNOW. Currently not supporting the case that an inner-loop induction +	     is not used in the outer-loop (i.e. only outside the outer-loop).  */ +	  gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo) +		      && !STMT_VINFO_LIVE_P (stmt_vinfo)); + +	  STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt; +	  if (vect_print_dump_info (REPORT_DETAILS)) +	    { +	      fprintf (vect_dump, "vector of inductions after inner-loop:"); +	      print_generic_expr (vect_dump, new_stmt, TDF_SLIM); +	    } +	} +    } + +    if (vect_print_dump_info (REPORT_DETAILS))      {        fprintf (vect_dump, "transform induction: created def-use cycle:"); @@ -1300,7 +1397,6 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)    tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);    int nunits = TYPE_VECTOR_SUBPARTS (vectype);    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); -  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);    tree vec_inv;    tree vec_cst;    tree t = NULL_TREE; @@ -1386,14 +1482,20 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)          def_stmt_info = vinfo_for_stmt (def_stmt);          vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);          gcc_assert (vec_stmt); -        vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0); +	if (TREE_CODE (vec_stmt) == PHI_NODE) +	  vec_oprnd = PHI_RESULT (vec_stmt); +	else +	  vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);          return vec_oprnd;        }      /* Case 4: operand is defined by a loop header phi - reduction  */      case vect_reduction_def:        { +	struct loop *loop; +          gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); +	loop = (bb_for_stmt (def_stmt))->loop_father;           /* Get the def before the loop  */          op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); @@ -1405,8 +1507,12 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)        {  	gcc_assert (TREE_CODE (def_stmt) == PHI_NODE); -	/* Get the def before the loop  */ -	return get_initial_def_for_induction (def_stmt); +        /* Get the def from the vectorized stmt.  */ +        def_stmt_info = vinfo_for_stmt (def_stmt); +        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info); +        gcc_assert (vec_stmt && (TREE_CODE (vec_stmt) == PHI_NODE)); +        vec_oprnd = PHI_RESULT (vec_stmt); +        return vec_oprnd;        }      default: @@ -1487,7 +1593,6 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)    vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);    gcc_assert (vec_stmt_for_operand);    vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0); -    return vec_oprnd;  } @@ -1503,7 +1608,11 @@ vect_finish_stmt_generation (tree stmt, tree vec_stmt,    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); +  gcc_assert (stmt == bsi_stmt (*bsi)); +  gcc_assert (TREE_CODE (stmt) != LABEL_EXPR); +    bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT); +    set_stmt_info (get_stmt_ann (vec_stmt),   		 new_stmt_vec_info (vec_stmt, loop_vinfo));  @@ -1571,6 +1680,8 @@ static tree  get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)  {    stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); +  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); +  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);    tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);    int nunits =  TYPE_VECTOR_SUBPARTS (vectype);    enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)); @@ -1581,8 +1692,14 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)    tree t = NULL_TREE;    int i;    tree vector_type; +  bool nested_in_vect_loop = false;     gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)); +  if (nested_in_vect_loop_p (loop, stmt)) +    nested_in_vect_loop = true; +  else +    gcc_assert (loop == (bb_for_stmt (stmt))->loop_father); +    vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);    switch (code) @@ -1590,7 +1707,10 @@ get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)    case WIDEN_SUM_EXPR:    case DOT_PROD_EXPR:    case PLUS_EXPR: -    *adjustment_def = init_val; +      if (nested_in_vect_loop) +	*adjustment_def = vecdef; +      else +	*adjustment_def = init_val;      /* Create a vector of zeros for init_def.  */      if (INTEGRAL_TYPE_P (type))        def_for_init = build_int_cst (type, 0); @@ -1679,24 +1799,31 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,    tree new_phi;    block_stmt_iterator exit_bsi;    tree vec_dest; -  tree new_temp; +  tree new_temp = NULL_TREE;    tree new_name; -  tree epilog_stmt; -  tree new_scalar_dest, exit_phi; +  tree epilog_stmt = NULL_TREE; +  tree new_scalar_dest, exit_phi, new_dest;    tree bitsize, bitpos, bytesize;     enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)); -  tree scalar_initial_def; +  tree adjustment_def;    tree vec_initial_def;    tree orig_name;    imm_use_iterator imm_iter;    use_operand_p use_p; -  bool extract_scalar_result; -  tree reduction_op; +  bool extract_scalar_result = false; +  tree reduction_op, expr;    tree orig_stmt;    tree use_stmt;    tree operation = GIMPLE_STMT_OPERAND (stmt, 1); +  bool nested_in_vect_loop = false;    int op_type; +  if (nested_in_vect_loop_p (loop, stmt)) +    { +      loop = loop->inner; +      nested_in_vect_loop = true; +    } +      op_type = TREE_OPERAND_LENGTH (operation);    reduction_op = TREE_OPERAND (operation, op_type-1);    vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op)); @@ -1709,7 +1836,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,       the scalar def before the loop, that defines the initial value       of the reduction variable.  */    vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt, -						  &scalar_initial_def); +						  &adjustment_def);    add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));    /* 1.2 set the loop-latch arg for the reduction-phi:  */ @@ -1788,6 +1915,15 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,    bitsize = TYPE_SIZE (scalar_type);    bytesize = TYPE_SIZE_UNIT (scalar_type); + +  /* In case this is a reduction in an inner-loop while vectorizing an outer +     loop - we don't need to extract a single scalar result at the end of the +     inner-loop.  The final vector of partial results will be used in the +     vectorized outer-loop, or reduced to a scalar result at the end of the +     outer-loop.  */ +  if (nested_in_vect_loop) +    goto vect_finalize_reduction; +    /* 2.3 Create the reduction code, using one of the three schemes described           above.  */ @@ -1934,6 +2070,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,      {        tree rhs; +      gcc_assert (!nested_in_vect_loop);        if (vect_print_dump_info (REPORT_DETAILS))  	fprintf (vect_dump, "extract scalar result"); @@ -1952,25 +2089,42 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,        bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);      } -  /* 2.4 Adjust the final result by the initial value of the reduction +vect_finalize_reduction: + +  /* 2.5 Adjust the final result by the initial value of the reduction  	 variable. (When such adjustment is not needed, then -	 'scalar_initial_def' is zero). +	 'adjustment_def' is zero).  For example, if code is PLUS we create: +	 new_temp = loop_exit_def + adjustment_def  */ -	 Create:  -	 s_out4 = scalar_expr <s_out3, scalar_initial_def>  */ -   -  if (scalar_initial_def) +  if (adjustment_def)      { -      tree tmp = build2 (code, scalar_type, new_temp, scalar_initial_def); -      epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp); -      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); +      if (nested_in_vect_loop) +	{ +	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE); +	  expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def); +	  new_dest = vect_create_destination_var (scalar_dest, vectype); +	} +      else +	{ +	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE); +	  expr = build2 (code, scalar_type, new_temp, adjustment_def); +	  new_dest = vect_create_destination_var (scalar_dest, scalar_type); +	} +      epilog_stmt = build_gimple_modify_stmt (new_dest, expr); +      new_temp = make_ssa_name (new_dest, epilog_stmt);        GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp; +#if 0 +      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); +#else        bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT); +#endif      } -  /* 2.6 Replace uses of s_out0 with uses of s_out3  */ -  /* Find the loop-closed-use at the loop exit of the original scalar result.   +  /* 2.6  Handle the loop-exit phi  */ + +  /* Replace uses of s_out0 with uses of s_out3: +     Find the loop-closed-use at the loop exit of the original scalar result.       (The reduction result is expected to have two immediate uses - one at the        latch block, and one at the loop exit).  */    exit_phi = NULL; @@ -1984,6 +2138,29 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt,      }    /* We expect to have found an exit_phi because of loop-closed-ssa form.  */    gcc_assert (exit_phi); + +  if (nested_in_vect_loop) +    { +      stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi); + +      /* FORNOW. Currently not supporting the case that an inner-loop reduction +	 is not used in the outer-loop (but only outside the outer-loop).  */ +      gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)  +		  && !STMT_VINFO_LIVE_P (stmt_vinfo)); + +      epilog_stmt = adjustment_def ? epilog_stmt :  new_phi; +      STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt; +      set_stmt_info (get_stmt_ann (epilog_stmt), +                     new_stmt_vec_info (epilog_stmt, loop_vinfo)); + +      if (vect_print_dump_info (REPORT_DETAILS)) +        { +          fprintf (vect_dump, "vector of partial results after inner-loop:"); +          print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); +        } +      return; +    } +    /* Replace the uses:  */    orig_name = PHI_RESULT (exit_phi);    FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name) @@ -2065,15 +2242,30 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)    tree new_stmt = NULL_TREE;    int j; +  if (nested_in_vect_loop_p (loop, stmt)) +    { +      loop = loop->inner; +      /* FORNOW. This restriction should be relaxed.  */ +      if (ncopies > 1) +	{ +	  if (vect_print_dump_info (REPORT_DETAILS)) +	    fprintf (vect_dump, "multiple types in nested loop."); +	  return false; +	} +    } +    gcc_assert (ncopies >= 1);    /* 1. Is vectorizable reduction?  */    /* Not supportable if the reduction variable is used in the loop.  */ -  if (STMT_VINFO_RELEVANT_P (stmt_info)) +  if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)      return false; -  if (!STMT_VINFO_LIVE_P (stmt_info)) +  /* Reductions that are not used even in an enclosing outer-loop, +     are expected to be "live" (used out of the loop).  */ +  if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop +      && !STMT_VINFO_LIVE_P (stmt_info))      return false;    /* Make sure it was already recognized as a reduction computation.  */ @@ -2130,9 +2322,9 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)    gcc_assert (dt == vect_reduction_def);    gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);    if (orig_stmt)  -    gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt)); +    gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));    else -    gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt)); +    gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));    if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))      return false; @@ -2357,6 +2549,7 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)    int nunits_in;    int nunits_out;    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); +  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);    tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;    enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};    tree new_stmt; @@ -2466,6 +2659,14 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)       needs to be generated.  */    gcc_assert (ncopies >= 1); +  /* FORNOW. This restriction should be relaxed.  */ +  if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +      fprintf (vect_dump, "multiple types in nested loop."); +      return false; +    } +    if (!vec_stmt) /* transformation not required.  */      {        STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; @@ -2480,6 +2681,14 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)    if (vect_print_dump_info (REPORT_DETAILS))      fprintf (vect_dump, "transform operation."); +  /* FORNOW. This restriction should be relaxed.  */ +  if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +        fprintf (vect_dump, "multiple types in nested loop."); +      return false; +    } +    /* Handle def.  */    scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);    vec_dest = vect_create_destination_var (scalar_dest, vectype_out); @@ -2671,6 +2880,7 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,    tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); +  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);    enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;    tree decl1 = NULL_TREE, decl2 = NULL_TREE;    tree new_temp; @@ -2752,6 +2962,14 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,       needs to be generated.  */    gcc_assert (ncopies >= 1); +  /* FORNOW. This restriction should be relaxed.  */ +  if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +      fprintf (vect_dump, "multiple types in nested loop."); +      return false; +    } +    /* Check the operands of the operation.  */    if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))      { @@ -3093,6 +3311,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);    tree vectype = STMT_VINFO_VECTYPE (stmt_info);    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); +  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);    enum tree_code code;    enum machine_mode vec_mode;    tree new_temp; @@ -3111,6 +3330,13 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)    int j;    gcc_assert (ncopies >= 1); +  /* FORNOW. This restriction should be relaxed.  */ +  if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +        fprintf (vect_dump, "multiple types in nested loop."); +      return false; +    }    if (!STMT_VINFO_RELEVANT_P (stmt_info))      return false; @@ -3373,6 +3599,7 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,    tree vec_oprnd0=NULL, vec_oprnd1=NULL;    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); +  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);    enum tree_code code, code1 = ERROR_MARK;    tree new_temp;    tree def, def_stmt; @@ -3425,6 +3652,13 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;    gcc_assert (ncopies >= 1); +  /* FORNOW. This restriction should be relaxed.  */ +  if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +        fprintf (vect_dump, "multiple types in nested loop."); +      return false; +    }    if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))  	  && INTEGRAL_TYPE_P (TREE_TYPE (op0))) @@ -3522,6 +3756,7 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,    tree vec_oprnd0=NULL, vec_oprnd1=NULL;    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); +  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);    enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;    tree decl1 = NULL_TREE, decl2 = NULL_TREE;    int op_type;  @@ -3575,6 +3810,13 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;    gcc_assert (ncopies >= 1); +  /* FORNOW. This restriction should be relaxed.  */ +  if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +        fprintf (vect_dump, "multiple types in nested loop."); +      return false; +    }    if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))  	  && INTEGRAL_TYPE_P (TREE_TYPE (op0))) @@ -3867,6 +4109,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)    struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;    tree vectype = STMT_VINFO_VECTYPE (stmt_info);    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); +  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);    enum machine_mode vec_mode;    tree dummy;    enum dr_alignment_support alignment_support_cheme; @@ -3882,6 +4125,13 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)    unsigned int group_size, i;    VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;    gcc_assert (ncopies >= 1); +  /* FORNOW. This restriction should be relaxed.  */ +  if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +        fprintf (vect_dump, "multiple types in nested loop."); +      return false; +    }    if (!STMT_VINFO_RELEVANT_P (stmt_info))      return false; @@ -4517,6 +4767,15 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)    bool strided_load = false;    tree first_stmt; +  gcc_assert (ncopies >= 1); +  /* FORNOW. This restriction should be relaxed.  */ +  if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) +    { +      if (vect_print_dump_info (REPORT_DETAILS)) +        fprintf (vect_dump, "multiple types in nested loop."); +      return false; +    } +    if (!STMT_VINFO_RELEVANT_P (stmt_info))      return false; @@ -4812,6 +5071,7 @@ vectorizable_live_operation (tree stmt,    tree operation;    stmt_vec_info stmt_info = vinfo_for_stmt (stmt);    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); +  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);    int i;    int op_type;    tree op; @@ -4829,6 +5089,10 @@ vectorizable_live_operation (tree stmt,    if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)      return false; +  /* FORNOW. CHECKME. */ +  if (nested_in_vect_loop_p (loop, stmt)) +    return false; +    operation = GIMPLE_STMT_OPERAND (stmt, 1);    op_type = TREE_OPERAND_LENGTH (operation); @@ -6124,8 +6388,18 @@ vect_transform_loop (loop_vec_info loop_vinfo)  	      fprintf (vect_dump, "------>vectorizing statement: ");  	      print_generic_expr (vect_dump, stmt, TDF_SLIM);  	    }	 +  	  stmt_info = vinfo_for_stmt (stmt); -	  gcc_assert (stmt_info); + +	  /* vector stmts created in the outer-loop during vectorization of +	     stmts in an inner-loop may not have a stmt_info, and do not +	     need to be vectorized.  */ +	  if (!stmt_info) +	    { +	      bsi_next (&si); +	      continue; +	    } +  	  if (!STMT_VINFO_RELEVANT_P (stmt_info)  	      && !STMT_VINFO_LIVE_P (stmt_info))  	    { @@ -6197,4 +6471,6 @@ vect_transform_loop (loop_vec_info loop_vinfo)    if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))      fprintf (vect_dump, "LOOP VECTORIZED."); +  if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOOPS)) +    fprintf (vect_dump, "OUTER LOOP VECTORIZED.");  } diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 6dc0c72..20c867c 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -1345,7 +1345,7 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)    STMT_VINFO_IN_PATTERN_P (res) = false;    STMT_VINFO_RELATED_STMT (res) = NULL;    STMT_VINFO_DATA_REF (res) = NULL; -  if (TREE_CODE (stmt) == PHI_NODE) +  if (TREE_CODE (stmt) == PHI_NODE && is_loop_header_bb_p (bb_for_stmt (stmt)))      STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;    else      STMT_VINFO_DEF_TYPE (res) = vect_loop_def; @@ -1364,6 +1364,20 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)  } +/* Function bb_in_loop_p + +   Used as predicate for dfs order traversal of the loop bbs.  */ + +static bool +bb_in_loop_p (const_basic_block bb, const void *data) +{ +  struct loop *loop = (struct loop *)data; +  if (flow_bb_inside_loop_p (loop, bb)) +    return true; +  return false; +} + +  /* Function new_loop_vec_info.     Create and initialize a new loop_vec_info struct for LOOP, as well as @@ -1375,37 +1389,76 @@ new_loop_vec_info (struct loop *loop)    loop_vec_info res;    basic_block *bbs;    block_stmt_iterator si; -  unsigned int i; +  unsigned int i, nbbs;    res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info)); +  LOOP_VINFO_LOOP (res) = loop;    bbs = get_loop_body (loop); -  /* Create stmt_info for all stmts in the loop.  */ +  /* Create/Update stmt_info for all stmts in the loop.  */    for (i = 0; i < loop->num_nodes; i++)      {        basic_block bb = bbs[i];        tree phi; -      for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) -        { -          stmt_ann_t ann = get_stmt_ann (phi); -          set_stmt_info (ann, new_stmt_vec_info (phi, res)); -        } - -      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) +      /* BBs in a nested inner-loop will have been already processed (because  +	 we will have called vect_analyze_loop_form for any nested inner-loop). +	 Therefore, for stmts in an inner-loop we just want to update the  +	 STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new  +	 loop_info of the outer-loop we are currently considering to vectorize  +	 (instead of the loop_info of the inner-loop). +	 For stmts in other BBs we need to create a stmt_info from scratch.  */ +      if (bb->loop_father != loop)  	{ -	  tree stmt = bsi_stmt (si); -	  stmt_ann_t ann; +	  /* Inner-loop bb.  */ +	  gcc_assert (loop->inner && bb->loop_father == loop->inner); +	  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) +	    { +	      stmt_vec_info stmt_info = vinfo_for_stmt (phi); +	      loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); +	      gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo)); +	      STMT_VINFO_LOOP_VINFO (stmt_info) = res; +	    } +	  for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) +	   { +	      tree stmt = bsi_stmt (si); +	      stmt_vec_info stmt_info = vinfo_for_stmt (stmt); +	      loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); +	      gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo)); +	      STMT_VINFO_LOOP_VINFO (stmt_info) = res; +	   } +	} +      else +	{ +	  /* bb in current nest.  */ +	  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) +	    { +	      stmt_ann_t ann = get_stmt_ann (phi); +	      set_stmt_info (ann, new_stmt_vec_info (phi, res)); +	    } -	  ann = stmt_ann (stmt); -	  set_stmt_info (ann, new_stmt_vec_info (stmt, res)); +	  for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) +	    { +	      tree stmt = bsi_stmt (si); +	      stmt_ann_t ann = stmt_ann (stmt); +	      set_stmt_info (ann, new_stmt_vec_info (stmt, res)); +	    }  	}      } -  LOOP_VINFO_LOOP (res) = loop; +  /* CHECKME: We want to visit all BBs before their successors (except for  +     latch blocks, for which this assertion wouldn't hold).  In the simple  +     case of the loop forms we allow, a dfs order of the BBs would the same  +     as reversed postorder traversal, so we are safe.  */ + +   free (bbs); +   bbs = XCNEWVEC (basic_block, loop->num_nodes); +   nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,  +			      bbs, loop->num_nodes, loop); +   gcc_assert (nbbs == loop->num_nodes); +    LOOP_VINFO_BBS (res) = bbs; -  LOOP_VINFO_EXIT_COND (res) = NULL;    LOOP_VINFO_NITERS (res) = NULL;    LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;    LOOP_VINFO_VECTORIZABLE_P (res) = 0; @@ -1430,7 +1483,7 @@ new_loop_vec_info (struct loop *loop)     stmts in the loop.  */  void -destroy_loop_vec_info (loop_vec_info loop_vinfo) +destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)  {    struct loop *loop;    basic_block *bbs; @@ -1446,6 +1499,18 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo)    bbs = LOOP_VINFO_BBS (loop_vinfo);    nbbs = loop->num_nodes; +  if (!clean_stmts) +    { +      free (LOOP_VINFO_BBS (loop_vinfo)); +      free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo)); +      free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo)); +      VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)); + +      free (loop_vinfo); +      loop->aux = NULL; +      return; +    } +    for (j = 0; j < nbbs; j++)      {        basic_block bb = bbs[j]; @@ -1597,7 +1662,6 @@ vect_supportable_dr_alignment (struct data_reference *dr)      return dr_aligned;    /* Possibly unaligned access.  */ -      if (DR_IS_READ (dr))      {        if (optab_handler (vec_realign_load_optab, mode)->insn_code != CODE_FOR_nothing @@ -1718,8 +1782,6 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,      {      case PHI_NODE:        *def = PHI_RESULT (*def_stmt); -      gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def -		  || *dt == vect_invariant_def);        break;      case GIMPLE_MODIFY_STMT: @@ -1760,6 +1822,8 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,                                  enum tree_code *code1, enum tree_code *code2)  {    stmt_vec_info stmt_info = vinfo_for_stmt (stmt); +  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); +  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);    bool ordered_p;    enum machine_mode vec_mode;    enum insn_code icode1, icode2; @@ -1782,9 +1846,15 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,       Some targets can take advantage of this and generate more efficient code.       For example, targets like Altivec, that support widen_mult using a sequence       of {mult_even,mult_odd} generate the following vectors: -        vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].  */ +        vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8]. + +     When vectorizaing outer-loops, we execute the inner-loop sequentially +     (each vectorized inner-loop iteration contributes to VF outer-loop  +     iterations in parallel). We therefore don't allow to change the order  +     of the computation in the inner-loop during outer-loop vectorization.  */ -   if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction) +   if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction +       && !nested_in_vect_loop_p (vect_loop, stmt))       ordered_p = false;     else       ordered_p = true; @@ -2008,8 +2078,10 @@ reduction_code_for_scalar_code (enum tree_code code,     Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.  */  tree -vect_is_simple_reduction (struct loop *loop, tree phi) +vect_is_simple_reduction (loop_vec_info loop_info, tree phi)  { +  struct loop *loop = (bb_for_stmt (phi))->loop_father; +  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);    edge latch_e = loop_latch_edge (loop);    tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);    tree def_stmt, def1, def2; @@ -2022,6 +2094,8 @@ vect_is_simple_reduction (struct loop *loop, tree phi)    imm_use_iterator imm_iter;    use_operand_p use_p; +  gcc_assert (loop == vect_loop || flow_loop_nested_p (vect_loop, loop)); +    name = PHI_RESULT (phi);    nloop_uses = 0;    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) @@ -2133,8 +2207,16 @@ vect_is_simple_reduction (struct loop *loop, tree phi)        return NULL_TREE;      } +  /* Generally, when vectorizing a reduction we change the order of the +     computation.  This may change the behavior of the program in some +     cases, so we need to check that this is ok.  One exception is when  +     vectorizing an outer-loop: the inner-loop is executed sequentially, +     and therefore vectorizing reductions in the inner-loop durint  +     outer-loop vectorization is safe.  */ +    /* CHECKME: check for !flag_finite_math_only too?  */ -  if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations) +  if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations +      && !nested_in_vect_loop_p (vect_loop, def_stmt))       {        /* Changing the order of operations changes the semantics.  */        if (vect_print_dump_info (REPORT_DETAILS)) @@ -2144,7 +2226,8 @@ vect_is_simple_reduction (struct loop *loop, tree phi)          }        return NULL_TREE;      } -  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)) +  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type) +	   && !nested_in_vect_loop_p (vect_loop, def_stmt))      {        /* Changing the order of operations changes the semantics.  */        if (vect_print_dump_info (REPORT_DETAILS)) @@ -2183,13 +2266,16 @@ vect_is_simple_reduction (struct loop *loop, tree phi)    /* Check that one def is the reduction def, defined by PHI, -     the other def is either defined in the loop by a GIMPLE_MODIFY_STMT, -     or it's an induction (defined by some phi node).  */ +     the other def is either defined in the loop ("vect_loop_def"), +     or it's an induction (defined by a loop-header phi-node).  */    if (def2 == phi        && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))        && (TREE_CODE (def1) == GIMPLE_MODIFY_STMT  -	  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def)) +	  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def +	  || (TREE_CODE (def1) == PHI_NODE  +	      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_loop_def +	      && !is_loop_header_bb_p (bb_for_stmt (def1)))))      {        if (vect_print_dump_info (REPORT_DETAILS))          { @@ -2201,7 +2287,10 @@ vect_is_simple_reduction (struct loop *loop, tree phi)    else if (def1 == phi  	   && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))  	   && (TREE_CODE (def2) == GIMPLE_MODIFY_STMT  -	       || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def)) +	       || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def +	       || (TREE_CODE (def2) == PHI_NODE +		   && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_loop_def +		   && !is_loop_header_bb_p (bb_for_stmt (def2)))))      {        /* Swap operands (just for simplicity - so that the rest of the code  	 can assume that the reduction variable is always the last (second) @@ -2340,7 +2429,7 @@ vectorize_loops (void)        if (!loop)  	continue;        loop_vinfo = loop->aux; -      destroy_loop_vec_info (loop_vinfo); +      destroy_loop_vec_info (loop_vinfo, true);        loop->aux = NULL;      } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index d96b944..e9f208a 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -92,9 +92,6 @@ typedef struct _loop_vec_info {    /* The loop basic blocks.  */    basic_block *bbs; -  /* The loop exit_condition.  */ -  tree exit_cond; -    /* Number of iterations.  */    tree num_iters; @@ -148,7 +145,6 @@ typedef struct _loop_vec_info {  /* Access Functions.  */  #define LOOP_VINFO_LOOP(L)            (L)->loop  #define LOOP_VINFO_BBS(L)             (L)->bbs -#define LOOP_VINFO_EXIT_COND(L)       (L)->exit_cond  #define LOOP_VINFO_NITERS(L)          (L)->num_iters  #define LOOP_VINFO_COST_MODEL_MIN_ITERS(L)	(L)->min_profitable_iters  #define LOOP_VINFO_VECTORIZABLE_P(L)  (L)->vectorizable @@ -170,6 +166,19 @@ typedef struct _loop_vec_info {  #define LOOP_VINFO_NITERS_KNOWN_P(L)                     \  NITERS_KNOWN_P((L)->num_iters) +static inline loop_vec_info +loop_vec_info_for_loop (struct loop *loop) +{ +  return (loop_vec_info) loop->aux; +} + +static inline bool +nested_in_vect_loop_p (struct loop *loop, tree stmt) +{ +  return (loop->inner  +          && (loop->inner == (bb_for_stmt (stmt))->loop_father)); +} +  /*-----------------------------------------------------------------*/  /* Info on vectorized defs.                                        */  /*-----------------------------------------------------------------*/ @@ -185,12 +194,15 @@ enum stmt_vec_info_type {    induc_vec_info_type,    type_promotion_vec_info_type,    type_demotion_vec_info_type, -  type_conversion_vec_info_type +  type_conversion_vec_info_type, +  loop_exit_ctrl_vec_info_type  };  /* Indicates whether/how a variable is used in the loop.  */  enum vect_relevant {    vect_unused_in_loop = 0, +  vect_used_in_outer_by_reduction, +  vect_used_in_outer,    /* defs that feed computations that end up (only) in a reduction. These       defs may be used by non-reduction stmts, but eventually, any  @@ -408,6 +420,15 @@ is_pattern_stmt_p (stmt_vec_info stmt_info)    return false;  } +static inline bool +is_loop_header_bb_p (basic_block bb) +{ +  if (bb == (bb->loop_father)->header) +    return true; +  gcc_assert (EDGE_COUNT (bb->preds) == 1); +  return false; +} +  /*-----------------------------------------------------------------*/  /* Info on data references alignment.                              */  /*-----------------------------------------------------------------*/ @@ -467,7 +488,7 @@ extern tree get_vectype_for_scalar_type (tree);  extern bool vect_is_simple_use (tree, loop_vec_info, tree *, tree *,  				enum vect_def_type *);  extern bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *); -extern tree vect_is_simple_reduction (struct loop *, tree); +extern tree vect_is_simple_reduction (loop_vec_info, tree);  extern bool vect_can_force_dr_alignment_p (tree, unsigned int);  extern enum dr_alignment_support vect_supportable_dr_alignment    (struct data_reference *); @@ -479,7 +500,7 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,  /* Creation and deletion of loop and stmt info structs.  */  extern loop_vec_info new_loop_vec_info (struct loop *loop); -extern void destroy_loop_vec_info (loop_vec_info); +extern void destroy_loop_vec_info (loop_vec_info, bool);  extern stmt_vec_info new_stmt_vec_info (tree stmt, loop_vec_info);  | 
