diff options
26 files changed, 937 insertions, 18 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 13c869f..620775c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2007-07-12 Dorit Nuzman <dorit@il.ibm.com> + + * target.h (builtin_vectorization_cost): Add new target builtin. + * target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New. + * tree-vectorizer.h (TARG_SCALAR_STMT_COST): New. + (TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST): New. + * tree-vect-analyze.c (vect_analyze_slp_instance): Initisliaze + uninitialized variables. + * tree-vect-transform.c (cost_for_stmt): New function. + (vect_estimate_min_profitable_iters): Call cost_for_stmt instead of + using cost 1 for all scalar stmts. Be less conservative when + estimating the number of prologue/epulogue iterations. Call + targetm.vectorize.builtin_vectorization_cost. Return + min_profitable_iters-1. + (vect_model_reduction_cost): Use TARG_SCALAR_TO_VEC_COST for + initialization cost instead of TARG_VEC_STMT_COST. Use + TARG_VEC_TO_SCALAR_COST instead of TARG_VEC_STMT_COST for reduction + epilogue code. Fix epilogue cost computation. + * config/spu/spu.c (spu_builtin_vectorization_cost): New. + (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Implement. + * config/spu/spu.h (TARG_COND_BRANCH_COST, TARG_SCALAR_STMT_COST): + (TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST, TARG_VEC_STMT_COST): + (TARG_VEC_TO_SCALAR_COST, TARG_SCALAR_TO_VEC, TARG_VEC_LOAD_COST): + (TARG_VEC_UNALIGNED_LOAD_COST, TARG_VEC_STORE_COST): Define. + 2007-07-12 Richard Guenther <rguenther@suse.de> * gimplify.c (gimplify_conversion): Make sure that the result diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c index 7e28167..f963268 100644 --- a/gcc/config/spu/spu.c +++ b/gcc/config/spu/spu.c @@ -133,6 +133,7 @@ static void spu_encode_section_info (tree, rtx, int); static tree spu_builtin_mul_widen_even (tree); static tree spu_builtin_mul_widen_odd (tree); static tree spu_builtin_mask_for_load (void); +static int spu_builtin_vectorization_cost (bool); extern const char *reg_names[]; rtx spu_compare_op0, spu_compare_op1; @@ -261,6 +262,9 @@ const struct attribute_spec spu_attribute_table[]; #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost + struct gcc_target targetm = TARGET_INITIALIZER; void @@ -5191,6 +5195,21 @@ spu_builtin_mask_for_load (void) return d->fndecl; } +/* Implement targetm.vectorize.builtin_vectorization_cost. */ +static int +spu_builtin_vectorization_cost (bool runtime_test) +{ + /* If the branch of the runtime test is taken - i.e. - the vectorized + version is skipped - this incurs a misprediction cost (because the + vectorized version is expected to be the fall-through). So we subtract + the latency of a mispredicted branch from the costs that are incured + when the vectorized version is executed. */ + if (runtime_test) + return -19; + else + return 0; +} + void spu_init_expanders (void) { diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h index 1f15472..b8af6b2 100644 --- a/gcc/config/spu/spu.h +++ b/gcc/config/spu/spu.h @@ -542,6 +542,52 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \ do { if (LOG!=0) fprintf (FILE, "\t.align\t%d\n", (LOG)); } while (0) +/* Model costs for the vectorizer. */ + +/* Cost of conditional branch. */ +#ifndef TARG_COND_BRANCH_COST +#define TARG_COND_BRANCH_COST 6 +#endif + +/* Cost of any scalar operation, excluding load and store. */ +#ifndef TARG_SCALAR_STMT_COST +#define TARG_SCALAR_STMT_COST 1 +#endif + +/* Cost of scalar load. */ +#undef TARG_SCALAR_LOAD_COST +#define TARG_SCALAR_LOAD_COST 2 /* load + rotate */ + +/* Cost of scalar store. */ +#undef TARG_SCALAR_STORE_COST +#define TARG_SCALAR_STORE_COST 10 + +/* Cost of any vector operation, excluding load, store, + or vector to scalar operation. */ +#undef TARG_VEC_STMT_COST +#define TARG_VEC_STMT_COST 1 + +/* Cost of vector to scalar operation. */ +#undef TARG_VEC_TO_SCALAR_COST +#define TARG_VEC_TO_SCALAR_COST 1 + +/* Cost of scalar to vector operation. */ +#undef TARG_SCALAR_TO_VEC_COST +#define TARG_SCALAR_TO_VEC_COST 1 + +/* Cost of aligned vector load. */ +#undef TARG_VEC_LOAD_COST +#define TARG_VEC_LOAD_COST 1 + +/* Cost of misaligned vector load. */ +#undef TARG_VEC_UNALIGNED_LOAD_COST +#define TARG_VEC_UNALIGNED_LOAD_COST 2 + +/* Cost of vector store. */ +#undef TARG_VEC_STORE_COST +#define TARG_VEC_STORE_COST 1 + + /* Misc */ #define CASE_VECTOR_MODE SImode diff --git a/gcc/target-def.h b/gcc/target-def.h index 31cb8f8..8942de7 100644 --- a/gcc/target-def.h +++ b/gcc/target-def.h @@ -356,6 +356,7 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. default_builtin_vectorized_conversion #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0 +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 0 #define TARGET_VECTORIZE \ { \ @@ -363,7 +364,8 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION, \ TARGET_VECTORIZE_BUILTIN_CONVERSION, \ TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \ - TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD \ + TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD, \ + TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ } #define TARGET_DEFAULT_TARGET_FLAGS 0 diff --git a/gcc/target.h b/gcc/target.h index 89ad0df..56c99bf 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -413,6 +413,10 @@ struct gcc_target element-by-element products for the odd elements. */ tree (* builtin_mul_widen_even) (tree); tree (* builtin_mul_widen_odd) (tree); + + /* Returns the cost to be added to the overheads involved with + executing the vectorized version of a loop. */ + int (*builtin_vectorization_cost) (bool); } vectorize; /* The initial value of target_flags. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 22fc219..a74a74f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,27 @@ +2007-07-12 Dorit Nuzman <dorit@il.ibm.com> + + * gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c: Loops now + get vectorized. + * gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c: Loops + now get vectorized. + * gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp: New. + * gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c: New. + * gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c: New. + * gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c: New. + * gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c: New. + * gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c: New. + * gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c: New. + * gcc.dg/vect/costmodel/spu/costmodel-vect-33.c: New. + * gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c: New. + * gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c: New. + * gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c: New. + * gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c: New. + * gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c: New. + * gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c: New. + * gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c: New. + * lib/target-supports.exp (check_effective_target_vect_int_mul): + Add spu. + 2007-07-12 Jakub Jelinek <jakub@redhat.com> PR c++/30854 diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c b/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c index cf5becc..55334fd 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c @@ -46,6 +46,6 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_max } } } */ -/* { dg-final { scan-tree-dump-times "vectorization not profitable" 2 "vect" { xfail vect_no_int_max } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_int_max } } } */ +/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c index cf5becc..55334fd 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c @@ -46,6 +46,6 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_max } } } */ -/* { dg-final { scan-tree-dump-times "vectorization not profitable" 2 "vect" { xfail vect_no_int_max } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_int_max } } } */ +/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c new file mode 100644 index 0000000..9347d05 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c @@ -0,0 +1,39 @@ +/* { dg-require-effective-target vect_float } */ + +#include <stdlib.h> +#include "../../tree-vect.h" + +void interp_pitch(float *exc, float *interp, int pitch, int len) +{ + int i,k; + int maxj; + + maxj=3; + for (i=0;i<len;i++) + { + float tmp = 0; + for (k=0;k<7;k++) + { + tmp += exc[i-pitch+k+maxj-6]; + } + interp[i] = tmp; + } +} + +int main() +{ + float *exc = calloc(126,sizeof(float)); + float *interp = calloc(80,sizeof(float)); + int pitch = -35; + + check_vect (); + + interp_pitch(exc, interp, pitch, 80); + free(exc); + free(interp); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c new file mode 100644 index 0000000..272b3f0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c @@ -0,0 +1,51 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "../../tree-vect.h" + +#define N 32 + +struct t{ + int k[N]; + int l; +}; + +struct s{ + char a; /* aligned */ + char b[N-1]; /* unaligned (offset 1B) */ + char c[N]; /* aligned (offset NB) */ + struct t d; /* aligned (offset 2NB) */ + struct t e; /* unaligned (offset 2N+4N+4 B) */ +}; + +int main1 () +{ + int i; + struct s tmp; + + /* unaligned */ + for (i = 0; i < N/2; i++) + { + tmp.b[i] = 5; + } + + /* check results: */ + for (i = 0; i <N/2; i++) + { + if (tmp.b[i] != 5) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c new file mode 100644 index 0000000..b3224f9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c @@ -0,0 +1,50 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "../../tree-vect.h" + +#define N 32 + +struct t{ + int k[N]; + int l; +}; + +struct s{ + char a; /* aligned */ + char b[N-1]; /* unaligned (offset 1B) */ + char c[N]; /* aligned (offset NB) */ + struct t d; /* aligned (offset 2NB) */ + struct t e; /* unaligned (offset 2N+4N+4 B) */ +}; + +int main1 () +{ + int i; + struct s tmp; + + /* aligned */ + for (i = 0; i < N/2; i++) + { + tmp.c[i] = 6; + } + + /* check results: */ + for (i = 0; i <N/2; i++) + { + if (tmp.c[i] != 6) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c new file mode 100644 index 0000000..9dcd09a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c @@ -0,0 +1,50 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "../../tree-vect.h" + +#define N 32 + +struct t{ + int k[N]; + int l; +}; + +struct s{ + char a; /* aligned */ + char b[N-1]; /* unaligned (offset 1B) */ + char c[N]; /* aligned (offset NB) */ + struct t d; /* aligned (offset 2NB) */ + struct t e; /* unaligned (offset 2N+4N+4 B) */ +}; + +int main1 () +{ + int i; + struct s tmp; + + /* aligned */ + for (i = 0; i < N/2; i++) + { + tmp.d.k[i] = 7; + } + + /* check results: */ + for (i = 0; i <N/2; i++) + { + if (tmp.d.k[i] != 7) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c new file mode 100644 index 0000000..736804f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c @@ -0,0 +1,51 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "../../tree-vect.h" + +#define N 32 + +struct t{ + int k[N]; + int l; +}; + +struct s{ + char a; /* aligned */ + char b[N-1]; /* unaligned (offset 1B) */ + char c[N]; /* aligned (offset NB) */ + struct t d; /* aligned (offset 2NB) */ + struct t e; /* unaligned (offset 2N+4N+4 B) */ +}; + +int main1 () +{ + int i; + struct s tmp; + + /* unaligned */ + for (i = 0; i < N/2; i++) + { + tmp.e.k[i] = 8; + } + + /* check results: */ + for (i = 0; i <N/2; i++) + { + if (tmp.e.k[i] != 8) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-33.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-33.c new file mode 100644 index 0000000..df92ceb --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-33.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "../../tree-vect.h" + +#define N 16 +struct test { + char ca[N]; +}; + +extern struct test s; + +int main1 () +{ + int i; + + for (i = 0; i < N; i++) + { + s.ca[i] = 5; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (s.ca[i] != 5) + abort (); + } + + return 0; +} + +int main (void) +{ + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c new file mode 100644 index 0000000..d0d40ac --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c @@ -0,0 +1,49 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "../../tree-vect.h" + +#define N 32 + +struct s{ + int m; + int n[N][N][N]; +}; + +struct test1{ + struct s a; /* array a.n is unaligned */ + int b; + int c; + struct s e; /* array e.n is aligned */ +}; + +int main1 () +{ + int i,j; + struct test1 tmp1; + + /* 1. unaligned */ + for (i = 0; i < N; i++) + { + tmp1.a.n[1][2][i] = 5; + } + + /* check results: */ + for (i = 0; i <N; i++) + { + if (tmp1.a.n[1][2][i] != 5) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c new file mode 100644 index 0000000..4e52af8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c @@ -0,0 +1,49 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "../../tree-vect.h" + +#define N 32 + +struct s{ + int m; + int n[N][N][N]; +}; + +struct test1{ + struct s a; /* array a.n is unaligned */ + int b; + int c; + struct s e; /* array e.n is aligned */ +}; + +int main1 () +{ + int i,j; + struct test1 tmp1; + + /* 2. aligned */ + for (i = 3; i < N-1; i++) + { + tmp1.a.n[1][2][i] = 6; + } + + /* check results: */ + for (i = 3; i < N-1; i++) + { + if (tmp1.a.n[1][2][i] != 6) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c new file mode 100644 index 0000000..58c5e9f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c @@ -0,0 +1,49 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "../../tree-vect.h" + +#define N 32 + +struct s{ + int m; + int n[N][N][N]; +}; + +struct test1{ + struct s a; /* array a.n is unaligned */ + int b; + int c; + struct s e; /* array e.n is aligned */ +}; + +int main1 () +{ + int i,j; + struct test1 tmp1; + + /* 3. aligned */ + for (i = 0; i < N; i++) + { + tmp1.e.n[1][2][i] = 7; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (tmp1.e.n[1][2][i] != 7) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c new file mode 100644 index 0000000..9cec936 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c @@ -0,0 +1,50 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "../../tree-vect.h" + +#define N 20 + +struct s{ + int m; + int n[N][N][N]; +}; + +struct test1{ + struct s a; /* array a.n is unaligned */ + int b; + int c; + struct s e; /* array e.n is aligned */ +}; + +int main1 () +{ + int i,j; + struct test1 tmp1; + + /* 4. unaligned */ + for (i = 3; i < N-3; i++) + { + tmp1.e.n[1][2][i] = 8; + } + + /* check results: */ + for (i = 3; i <N-3; i++) + { + if (tmp1.e.n[1][2][i] != 8) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + return main1 (); +} + +/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c new file mode 100644 index 0000000..41fe3ae --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c @@ -0,0 +1,47 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "../../tree-vect.h" + +#define N 8 +#define OFF 4 + +/* Check handling of accesses for which the "initial condition" - + the expression that represents the first location accessed - is + more involved than just an ssa_name. */ + +int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + +int main1 (int *pib) +{ + int i; + int ia[N+OFF]; + int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + + for (i = OFF; i < N; i++) + { + ia[i] = pib[i - OFF]; + } + + + /* check results: */ + for (i = OFF; i < N; i++) + { + if (ia[i] != pib[i - OFF]) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + main1 (&ib[OFF]); + return 0; +} + + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c new file mode 100644 index 0000000..71f3977 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c @@ -0,0 +1,47 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "../../tree-vect.h" + +#define N 8 +#define OFF 4 + +/* Check handling of accesses for which the "initial condition" - + the expression that represents the first location accessed - is + more involved than just an ssa_name. */ + +int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + +int main1 (int *pib) +{ + int i; + int ia[N+OFF]; + int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + + for (i = OFF; i < N; i++) + { + pib[i - OFF] = ic[i]; + } + + + /* check results: */ + for (i = OFF; i < N; i++) + { + if (pib[i - OFF] != ic[i]) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + main1 (&ib[OFF]); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c new file mode 100644 index 0000000..f6127ba --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c @@ -0,0 +1,47 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "../../tree-vect.h" + +#define N 8 +#define OFF 4 + +/* Check handling of accesses for which the "initial condition" - + the expression that represents the first location accessed - is + more involved than just an ssa_name. */ + +int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + +int main1 (int *pib) +{ + int i; + int ia[N+OFF]; + int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + + for (i = OFF; i < N; i++) + { + ia[i] = ic[i - OFF]; + } + + + /* check results: */ + for (i = OFF; i < N; i++) + { + if (ia[i] != ic[i - OFF]) + abort (); + } + + return 0; +} + +int main (void) +{ + check_vect (); + + main1 (&ib[OFF]); + return 0; +} + + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c new file mode 100644 index 0000000..e01b67e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c @@ -0,0 +1,38 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "../../tree-vect.h" + +#define N 26 +int a[N]; + +int main1 (int X) +{ + int s = X; + int i; + + /* vectorization of reduction with induction. */ + for (i = 0; i < N; i++) + s += (i + a[i]); + + return s; +} + +int main (void) +{ + int s, i; + check_vect (); + + for (i = 0; i < N; i++) + a[i] = 2*i; + + s = main1 (3); + if (s != 978) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_int_mult } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target {! vect_int_mult } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp b/gcc/testsuite/gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp new file mode 100644 index 0000000..fe7439a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp @@ -0,0 +1,69 @@ +# Copyright (C) 1997, 2004, 2005, 2006 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# GCC testsuite that uses the `dg.exp' driver. + +# Load support procs. +load_lib gcc-dg.exp + +# Exit immediately if this isn't a powerpc target. +if { ![istarget spu*-*-*] } then { + return +} + + +# Set up flags used for tests that don't specify options. +set DEFAULT_VECTCFLAGS "" + +# These flags are used for all targets. +lappend DEFAULT_VECTCFLAGS "-O2" "-ftree-vectorize" "-fvect-cost-model" + +# If the target system supports vector instructions, the default action +# for a test is 'run', otherwise it's 'compile'. Save current default. +# Executing vector instructions on a system without hardware vector support +# is also disabled by a call to check_vect, but disabling execution here is +# more efficient. +global dg-do-what-default +set save-dg-do-what-default ${dg-do-what-default} + +set dg-do-what-default run + +# Initialize `dg'. +dg-init + +lappend DEFAULT_VECTCFLAGS "-fdump-tree-vect-details" + +# Main loop. +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-pr*.\[cS\]]] \ + "" $DEFAULT_VECTCFLAGS +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-vect-*.\[cS\]]] \ + "" $DEFAULT_VECTCFLAGS + +#### Tests with special options +global SAVED_DEFAULT_VECTCFLAGS +set SAVED_DEFAULT_VECTCFLAGS $DEFAULT_VECTCFLAGS + +# -ffast-math tests +set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS +lappend DEFAULT_VECTCFLAGS "-ffast-math" +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-fast-math-vect*.\[cS\]]] \ + "" $DEFAULT_VECTCFLAGS + +# Clean up. +set dg-do-what-default ${save-dg-do-what-default} + +# All done. +dg-finish diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 9ada7f1..ccb6356 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2039,6 +2039,7 @@ proc check_effective_target_vect_int_mult { } { } else { set et_vect_int_mult_saved 0 if { [istarget powerpc*-*-*] + || [istarget spu-*-*] || [istarget i?86-*-*] || [istarget x86_64-*-*] } { set et_vect_int_mult_saved 1 diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 8a6e086..193c549 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -74,6 +74,34 @@ static void vect_update_inits_of_drs (loop_vec_info, tree); static int vect_min_worthwhile_factor (enum tree_code); +static int +cost_for_stmt (tree stmt) +{ + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + + switch (STMT_VINFO_TYPE (stmt_info)) + { + case load_vec_info_type: + return TARG_SCALAR_LOAD_COST; + case store_vec_info_type: + return TARG_SCALAR_STORE_COST; + case op_vec_info_type: + case condition_vec_info_type: + case assignment_vec_info_type: + case reduc_vec_info_type: + case induc_vec_info_type: + case type_promotion_vec_info_type: + case type_demotion_vec_info_type: + case type_conversion_vec_info_type: + case call_vec_info_type: + return TARG_SCALAR_STMT_COST; + case undef_vec_info_type: + default: + gcc_unreachable (); + } +} + + /* Function vect_estimate_min_profitable_iters Return the number of iterations required for the vector version of the @@ -138,7 +166,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) if (!STMT_VINFO_RELEVANT_P (stmt_info) && !STMT_VINFO_LIVE_P (stmt_info)) continue; - scalar_single_iter_cost++; + scalar_single_iter_cost += cost_for_stmt (stmt); vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info); vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info); } @@ -148,7 +176,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) loop. FORNOW: If we dont know the value of peel_iters for prologue or epilogue - at compile-time - we assume the worst. + at compile-time - we assume it's (vf-1)/2 (the worst would be vf-1). TODO: Build an expression that represents peel_iters for prologue and epilogue to be used in a run-time test. */ @@ -157,17 +185,17 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) if (byte_misalign < 0) { - peel_iters_prologue = vf - 1; + peel_iters_prologue = (vf - 1)/2; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "cost model: " - "prologue peel iters set conservatively."); + "prologue peel iters set to (vf-1)/2."); /* If peeling for alignment is unknown, loop bound of main loop becomes unknown. */ - peel_iters_epilogue = vf - 1; + peel_iters_epilogue = (vf - 1)/2; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "cost model: " - "epilogue peel iters set conservatively because " + "epilogue peel iters set to (vf-1)/2 because " "peeling for alignment is unknown ."); } else @@ -186,10 +214,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) { - peel_iters_epilogue = vf - 1; + peel_iters_epilogue = (vf - 1)/2; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "cost model: " - "epilogue peel iters set conservatively because " + "epilogue peel iters set to (vf-1)/2 because " "loop iterations are unknown ."); } else @@ -229,6 +257,26 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost) + (peel_iters_epilogue * scalar_single_iter_cost); + /* Allow targets add additional (outside-of-loop) costs. FORNOW, the only + information we provide for the target is whether testing against the + threshold involves a runtime test. */ + if (targetm.vectorize.builtin_vectorization_cost) + { + bool runtime_test = false; + + /* If the number of iterations is unknown, or the + peeling-for-misalignment amount is unknown, we eill have to generate + a runtime test to test the loop count agains the threshold. */ + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + || (byte_misalign < 0)) + runtime_test = true; + vec_outside_cost += + targetm.vectorize.builtin_vectorization_cost (runtime_test); + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d", + targetm.vectorize.builtin_vectorization_cost (runtime_test)); + } + /* Calculate number of iterations required to make the vector version profitable, relative to the loop bodies only. The following condition must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where @@ -280,7 +328,14 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) min_profitable_iters < vf ? vf : min_profitable_iters); } - return min_profitable_iters < vf ? vf : min_profitable_iters; + min_profitable_iters = + min_profitable_iters < vf ? vf : min_profitable_iters; + + /* Because the condition we create is: + if (niters <= min_profitable_iters) + then skip the vectorized loop. */ + min_profitable_iters--; + return min_profitable_iters; } @@ -321,7 +376,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1)); /* Add in cost for initial definition. */ - outer_cost += TARG_VEC_STMT_COST; + outer_cost += TARG_SCALAR_TO_VEC_COST; /* Determine cost of epilogue code. @@ -341,11 +396,13 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, optab = optab_for_tree_code (code, vectype); /* We have a whole vector shift available. */ - if (!VECTOR_MODE_P (mode) - || optab->handlers[mode].insn_code == CODE_FOR_nothing) + if (VECTOR_MODE_P (mode) + && optab->handlers[mode].insn_code != CODE_FOR_nothing + && vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing) /* Final reduction via vector shifts and the reduction operator. Also requires scalar extract. */ - outer_cost += ((exact_log2(nelements) * 2 + 1) * TARG_VEC_STMT_COST); + outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST + + TARG_VEC_TO_SCALAR_COST); else /* Use extracts and reduction op for final reduction. For N elements, we have N extracts and N-1 reduction ops. */ diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index e5957ca..440bb3d 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -326,6 +326,21 @@ typedef struct _stmt_vec_info { #define TARG_COND_BRANCH_COST 3 #endif +/* Cost of any scalar operation, excluding load and store. */ +#ifndef TARG_SCALAR_STMT_COST +#define TARG_SCALAR_STMT_COST 1 +#endif + +/* Cost of scalar load. */ +#ifndef TARG_SCALAR_LOAD_COST +#define TARG_SCALAR_LOAD_COST 1 +#endif + +/* Cost of scalar store. */ +#ifndef TARG_SCALAR_STORE_COST +#define TARG_SCALAR_STORE_COST 1 +#endif + /* Cost of any vector operation, excluding load, store or vector to scalar operation. */ #ifndef TARG_VEC_STMT_COST |