diff options
author | Ira Rosen <irar@il.ibm.com> | 2010-06-07 09:12:32 +0000 |
---|---|---|
committer | Ira Rosen <irar@gcc.gnu.org> | 2010-06-07 09:12:32 +0000 |
commit | 35e1a5e7cf85b08634a46b08e76d28ced021aff9 (patch) | |
tree | 8795ce881dcce2d97198c58aa7cb9c8a103a4f24 | |
parent | 81c566c2fa32ad31b8b22f7ada161778150e51d1 (diff) | |
download | gcc-35e1a5e7cf85b08634a46b08e76d28ced021aff9.zip gcc-35e1a5e7cf85b08634a46b08e76d28ced021aff9.tar.gz gcc-35e1a5e7cf85b08634a46b08e76d28ced021aff9.tar.bz2 |
tm.texi (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Update documentation.
* doc/tm.texi (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Update
documentation.
* targhooks.c (default_builtin_vectorization_cost): New function.
* targhooks.h (default_builtin_vectorization_cost): Declare.
* target.h (enum vect_cost_for_stmt): Define.
(builtin_vectorization_cost): Change argument and comment.
* tree-vectorizer.h: Remove cost model macros.
* tree-vect-loop.c: Include target.h.
(vect_get_cost): New function.
(vect_estimate_min_profitable_iters): Replace cost model macros with
calls to vect_get_cost.
(vect_model_reduction_cost, vect_model_induction_cost): Likewise.
* target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Add
default implementation.
* tree-vect-stmts.c (cost_for_stmt): Replace cost model macros with
calls to target hook builtin_vectorization_cost.
(vect_model_simple_cost, vect_model_store_cost, vect_model_load_cost):
Likewise.
* Makefile.in (tree-vect-loop.o): Add dependency on TARGET_H.
* config/spu/spu.c (spu_builtin_vectorization_cost): Replace with new
implementation to return costs.
* config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise.
* config/spu/spu.h: Remove vectorizer cost model macros.
* config/i386/i386.h: Likewise.
* tree-vect-slp.c (vect_build_slp_tree): Replace cost model macro with
a call to target hook builtin_vectorization_cost.
From-SVN: r160360
-rw-r--r-- | gcc/ChangeLog | 29 | ||||
-rw-r--r-- | gcc/Makefile.in | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 56 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 51 | ||||
-rw-r--r-- | gcc/config/spu/spu.c | 43 | ||||
-rw-r--r-- | gcc/config/spu/spu.h | 51 | ||||
-rw-r--r-- | gcc/doc/tm.texi | 5 | ||||
-rw-r--r-- | gcc/target-def.h | 3 | ||||
-rw-r--r-- | gcc/target.h | 23 | ||||
-rw-r--r-- | gcc/targhooks.c | 30 | ||||
-rw-r--r-- | gcc/targhooks.h | 2 | ||||
-rw-r--r-- | gcc/tree-vect-loop.c | 49 | ||||
-rw-r--r-- | gcc/tree-vect-slp.c | 4 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 46 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 64 |
15 files changed, 221 insertions, 237 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e8b6179..550bb70 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,32 @@ +2010-06-07 Ira Rosen <irar@il.ibm.com> + + * doc/tm.texi (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Update + documentation. + * targhooks.c (default_builtin_vectorization_cost): New function. + * targhooks.h (default_builtin_vectorization_cost): Declare. + * target.h (enum vect_cost_for_stmt): Define. + (builtin_vectorization_cost): Change argument and comment. + * tree-vectorizer.h: Remove cost model macros. + * tree-vect-loop.c: Include target.h. + (vect_get_cost): New function. + (vect_estimate_min_profitable_iters): Replace cost model macros with + calls to vect_get_cost. + (vect_model_reduction_cost, vect_model_induction_cost): Likewise. + * target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Add + default implementation. + * tree-vect-stmts.c (cost_for_stmt): Replace cost model macros with + calls to target hook builtin_vectorization_cost. + (vect_model_simple_cost, vect_model_store_cost, vect_model_load_cost): + Likewise. + * Makefile.in (tree-vect-loop.o): Add dependency on TARGET_H. + * config/spu/spu.c (spu_builtin_vectorization_cost): Replace with new + implementation to return costs. + * config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise. + * config/spu/spu.h: Remove vectorizer cost model macros. + * config/i386/i386.h: Likewise. + * tree-vect-slp.c (vect_build_slp_tree): Replace cost model macro with + a call to target hook builtin_vectorization_cost. + 2010-06-06 Sriraman Tallam <tmsriram@google.com> PR target/44319 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 8d9b808..fed7fe8 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2704,7 +2704,7 @@ tree-vect-loop.o: tree-vect-loop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(GGC_H) $(TREE_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) \ $(TREE_DUMP_H) $(CFGLOOP_H) $(CFGLAYOUT_H) $(EXPR_H) $(RECOG_H) $(OPTABS_H) \ $(TOPLEV_H) $(SCEV_H) $(TREE_VECTORIZER_H) tree-pretty-print.h \ - gimple-pretty-print.h + gimple-pretty-print.h $(TARGET_H) tree-vect-loop-manip.o: tree-vect-loop-manip.c $(CONFIG_H) $(SYSTEM_H) \ coretypes.h $(TM_H) $(GGC_H) $(TREE_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \ $(TREE_FLOW_H) $(TREE_DUMP_H) $(CFGLOOP_H) $(CFGLAYOUT_H) $(EXPR_H) $(TOPLEV_H) \ diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 8ae0c24..407238f 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -29296,28 +29296,52 @@ static const struct attribute_spec ix86_attribute_table[] = /* Implement targetm.vectorize.builtin_vectorization_cost. */ static int -ix86_builtin_vectorization_cost (bool runtime_test) +ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost) { - /* If the branch of the runtime test is taken - i.e. - the vectorized - version is skipped - this incurs a misprediction cost (because the - vectorized version is expected to be the fall-through). So we subtract - the latency of a mispredicted branch from the costs that are incured - when the vectorized version is executed. + switch (type_of_cost) + { + case scalar_stmt: + return ix86_cost->scalar_stmt_cost; - TODO: The values in individual target tables have to be tuned or new - fields may be needed. For eg. on K8, the default branch path is the - not-taken path. If the taken path is predicted correctly, the minimum - penalty of going down the taken-path is 1 cycle. If the taken-path is - not predicted correctly, then the minimum penalty is 10 cycles. */ + case scalar_load: + return ix86_cost->scalar_load_cost; - if (runtime_test) - { - return (-(ix86_cost->cond_taken_branch_cost)); + case scalar_store: + return ix86_cost->scalar_store_cost; + + case vector_stmt: + return ix86_cost->vec_stmt_cost; + + case vector_load: + return ix86_cost->vec_align_load_cost; + + case vector_store: + return ix86_cost->vec_store_cost; + + case vec_to_scalar: + return ix86_cost->vec_to_scalar_cost; + + case scalar_to_vec: + return ix86_cost->scalar_to_vec_cost; + + case unaligned_load: + return ix86_cost->vec_unalign_load_cost; + + case cond_branch_taken: + return ix86_cost->cond_taken_branch_cost; + + case cond_branch_not_taken: + return ix86_cost->cond_not_taken_branch_cost; + + case vec_perm: + return 1; + + default: + gcc_unreachable (); } - else - return 0; } + /* Implement targetm.vectorize.builtin_vec_perm. */ static tree diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index c3fc0e4..226f784 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2420,57 +2420,6 @@ struct GTY(()) machine_function { #define SYMBOL_REF_DLLEXPORT_P(X) \ ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_DLLEXPORT) != 0) -/* Model costs for vectorizer. */ - -/* Cost of conditional branch. */ -#undef TARG_COND_BRANCH_COST -#define TARG_COND_BRANCH_COST ix86_cost->branch_cost - -/* Cost of any scalar operation, excluding load and store. */ -#undef TARG_SCALAR_STMT_COST -#define TARG_SCALAR_STMT_COST ix86_cost->scalar_stmt_cost - -/* Cost of scalar load. */ -#undef TARG_SCALAR_LOAD_COST -#define TARG_SCALAR_LOAD_COST ix86_cost->scalar_load_cost - -/* Cost of scalar store. */ -#undef TARG_SCALAR_STORE_COST -#define TARG_SCALAR_STORE_COST ix86_cost->scalar_store_cost - -/* Cost of any vector operation, excluding load, store or vector to scalar - operation. */ -#undef TARG_VEC_STMT_COST -#define TARG_VEC_STMT_COST ix86_cost->vec_stmt_cost - -/* Cost of vector to scalar operation. */ -#undef TARG_VEC_TO_SCALAR_COST -#define TARG_VEC_TO_SCALAR_COST ix86_cost->vec_to_scalar_cost - -/* Cost of scalar to vector operation. */ -#undef TARG_SCALAR_TO_VEC_COST -#define TARG_SCALAR_TO_VEC_COST ix86_cost->scalar_to_vec_cost - -/* Cost of aligned vector load. */ -#undef TARG_VEC_LOAD_COST -#define TARG_VEC_LOAD_COST ix86_cost->vec_align_load_cost - -/* Cost of misaligned vector load. */ -#undef TARG_VEC_UNALIGNED_LOAD_COST -#define TARG_VEC_UNALIGNED_LOAD_COST ix86_cost->vec_unalign_load_cost - -/* Cost of vector store. */ -#undef TARG_VEC_STORE_COST -#define TARG_VEC_STORE_COST ix86_cost->vec_store_cost - -/* Cost of conditional taken branch for vectorizer cost model. */ -#undef TARG_COND_TAKEN_BRANCH_COST -#define TARG_COND_TAKEN_BRANCH_COST ix86_cost->cond_taken_branch_cost - -/* Cost of conditional not taken branch for vectorizer cost model. */ -#undef TARG_COND_NOT_TAKEN_BRANCH_COST -#define TARG_COND_NOT_TAKEN_BRANCH_COST ix86_cost->cond_not_taken_branch_cost - /* Local variables: version-control: t diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c index 0eff2cc..f8dd305 100644 --- a/gcc/config/spu/spu.c +++ b/gcc/config/spu/spu.c @@ -209,7 +209,7 @@ static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode, static tree spu_builtin_mul_widen_even (tree); static tree spu_builtin_mul_widen_odd (tree); static tree spu_builtin_mask_for_load (void); -static int spu_builtin_vectorization_cost (bool); +static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt); static bool spu_vector_alignment_reachable (const_tree, bool); static tree spu_builtin_vec_perm (tree, tree *); static enum machine_mode spu_addr_space_pointer_mode (addr_space_t); @@ -6695,17 +6695,36 @@ spu_builtin_mask_for_load (void) /* Implement targetm.vectorize.builtin_vectorization_cost. */ static int -spu_builtin_vectorization_cost (bool runtime_test) -{ - /* If the branch of the runtime test is taken - i.e. - the vectorized - version is skipped - this incurs a misprediction cost (because the - vectorized version is expected to be the fall-through). So we subtract - the latency of a mispredicted branch from the costs that are incurred - when the vectorized version is executed. */ - if (runtime_test) - return -19; - else - return 0; +spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost) +{ + switch (type_of_cost) + { + case scalar_stmt: + case vector_stmt: + case vector_load: + case vector_store: + case vec_to_scalar: + case scalar_to_vec: + case cond_branch_not_taken: + case vec_perm: + return 1; + + case scalar_store: + return 10; + + case scalar_load: + /* Load + rotate. */ + return 2; + + case unaligned_load: + return 2; + + case cond_branch_taken: + return 6; + + default: + gcc_unreachable (); + } } /* Return true iff, data reference of TYPE can reach vector alignment (16) diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h index c8b0e12..54b4612 100644 --- a/gcc/config/spu/spu.h +++ b/gcc/config/spu/spu.h @@ -524,57 +524,6 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \ do { if (LOG!=0) fprintf (FILE, "\t.align\t%d\n", (LOG)); } while (0) -/* Model costs for the vectorizer. */ - -/* Cost of conditional branch. */ -#ifndef TARG_COND_BRANCH_COST -#define TARG_COND_BRANCH_COST 6 -#endif - -/* Cost of any scalar operation, excluding load and store. */ -#ifndef TARG_SCALAR_STMT_COST -#define TARG_SCALAR_STMT_COST 1 -#endif - -/* Cost of scalar load. */ -#undef TARG_SCALAR_LOAD_COST -#define TARG_SCALAR_LOAD_COST 2 /* load + rotate */ - -/* Cost of scalar store. */ -#undef TARG_SCALAR_STORE_COST -#define TARG_SCALAR_STORE_COST 10 - -/* Cost of any vector operation, excluding load, store, - or vector to scalar operation. */ -#undef TARG_VEC_STMT_COST -#define TARG_VEC_STMT_COST 1 - -/* Cost of vector to scalar operation. */ -#undef TARG_VEC_TO_SCALAR_COST -#define TARG_VEC_TO_SCALAR_COST 1 - -/* Cost of scalar to vector operation. */ -#undef TARG_SCALAR_TO_VEC_COST -#define TARG_SCALAR_TO_VEC_COST 1 - -/* Cost of aligned vector load. */ -#undef TARG_VEC_LOAD_COST -#define TARG_VEC_LOAD_COST 1 - -/* Cost of misaligned vector load. */ -#undef TARG_VEC_UNALIGNED_LOAD_COST -#define TARG_VEC_UNALIGNED_LOAD_COST 2 - -/* Cost of vector store. */ -#undef TARG_VEC_STORE_COST -#define TARG_VEC_STORE_COST 1 - -/* Cost of vector permutation. */ -#ifndef TARG_VEC_PERMUTE_COST -#define TARG_VEC_PERMUTE_COST 1 -#endif - - /* Misc */ #define CASE_VECTOR_MODE SImode diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 16a30a8..34f94c6 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5742,9 +5742,8 @@ preserved (e.g.@: used only by a reduction computation). Otherwise, the @code{widen_mult_hi/lo} idioms will be used. @end deftypefn -@deftypefn {Target Hook} int TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST (bool @var{runtime_test}) -Returns the cost to be added to the overhead involved with executing -the vectorized version of a loop. +@deftypefn {Target Hook} int TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST (enum vect_cost_for_stmt @var{type_of_cost}) +Returns cost of different scalar or vector statements for vectorization cost model. @end deftypefn @deftypefn {Target Hook} bool TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE (const_tree @var{type}, bool @var{is_packed}) diff --git a/gcc/target-def.h b/gcc/target-def.h index 5a088bc..91120ba 100644 --- a/gcc/target-def.h +++ b/gcc/target-def.h @@ -417,7 +417,8 @@ default_builtin_vectorized_conversion #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0 -#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 0 +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + default_builtin_vectorization_cost #define TARGET_VECTOR_ALIGNMENT_REACHABLE \ default_builtin_vector_alignment_reachable #define TARGET_VECTORIZE_BUILTIN_VEC_PERM 0 diff --git a/gcc/target.h b/gcc/target.h index 4e2bf98..e209cfe 100644 --- a/gcc/target.h +++ b/gcc/target.h @@ -110,6 +110,23 @@ struct asm_int_op const char *ti; }; +/* Types of costs for vectorizer cost model. */ +enum vect_cost_for_stmt +{ + scalar_stmt, + scalar_load, + scalar_store, + vector_stmt, + vector_load, + unaligned_load, + vector_store, + vec_to_scalar, + scalar_to_vec, + cond_branch_not_taken, + cond_branch_taken, + vec_perm +}; + /* The target structure. This holds all the backend hooks. */ struct gcc_target @@ -505,9 +522,9 @@ struct gcc_target tree (* builtin_mul_widen_even) (tree); tree (* builtin_mul_widen_odd) (tree); - /* Returns the cost to be added to the overheads involved with - executing the vectorized version of a loop. */ - int (*builtin_vectorization_cost) (bool); + /* Cost of different vector/scalar statements in vectorization cost + model. */ + int (* builtin_vectorization_cost) (enum vect_cost_for_stmt); /* Return true if vector alignment is reachable (by peeling N iterations) for the given type. */ diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 3dccae2..821b83f 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -476,6 +476,36 @@ default_builtin_vectorized_conversion (unsigned int code ATTRIBUTE_UNUSED, return NULL_TREE; } +/* Default vectorizer cost model values. */ + +int +default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost) +{ + switch (type_of_cost) + { + case scalar_stmt: + case scalar_load: + case scalar_store: + case vector_stmt: + case vector_load: + case vector_store: + case vec_to_scalar: + case scalar_to_vec: + case cond_branch_not_taken: + case vec_perm: + return 1; + + case unaligned_load: + return 2; + + case cond_branch_taken: + return 3; + + default: + gcc_unreachable (); + } +} + /* Reciprocal. */ tree diff --git a/gcc/targhooks.h b/gcc/targhooks.h index efc8a98..6e71445 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -77,6 +77,8 @@ extern tree default_builtin_vectorized_function (tree, tree, tree); extern tree default_builtin_vectorized_conversion (unsigned int, tree, tree); +extern int default_builtin_vectorization_cost (enum vect_cost_for_stmt); + extern tree default_builtin_reciprocal (unsigned int, bool, bool); extern bool default_builtin_vector_alignment_reachable (const_tree, bool); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index a6b331a..ccddab3 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-chrec.h" #include "tree-scalar-evolution.h" #include "tree-vectorizer.h" +#include "target.h" /* Loop Vectorization Pass. @@ -1116,6 +1117,15 @@ vect_analyze_loop_form (struct loop *loop) } +/* Get cost by calling cost target builtin. */ + +static inline +int vect_get_cost (enum vect_cost_for_stmt type_of_cost) +{ + return targetm.vectorize.builtin_vectorization_cost (type_of_cost); +} + + /* Function vect_analyze_loop_operations. Scan the loop stmts and make sure they are all vectorizable. */ @@ -2056,7 +2066,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) - vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST; + vec_outside_cost += vect_get_cost (cond_branch_taken); /* Count statements in scalar loop. Using this as scalar cost for a single iteration for now. @@ -2125,8 +2135,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) branch per peeled loop. Even if scalar loop iterations are known, vector iterations are not known since peeled prologue iterations are not known. Hence guards remain the same. */ - peel_guard_costs += 2 * (TARG_COND_TAKEN_BRANCH_COST - + TARG_COND_NOT_TAKEN_BRANCH_COST); + peel_guard_costs += 2 * (vect_get_cost (cond_branch_taken) + + vect_get_cost (cond_branch_not_taken)); } else { @@ -2152,8 +2162,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) /* If peeled iterations are known but number of scalar loop iterations are unknown, count a taken branch per peeled loop. */ - peel_guard_costs += 2 * TARG_COND_TAKEN_BRANCH_COST; - + peel_guard_costs += 2 * vect_get_cost (cond_branch_taken); } else { @@ -2228,16 +2237,16 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) /* Cost model check occurs at versioning. */ if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) - scalar_outside_cost += TARG_COND_NOT_TAKEN_BRANCH_COST; + scalar_outside_cost += vect_get_cost (cond_branch_not_taken); else { /* Cost model check occurs at prologue generation. */ if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0) - scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST - + TARG_COND_NOT_TAKEN_BRANCH_COST; + scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken) + + vect_get_cost (cond_branch_not_taken); /* Cost model check occurs at epilogue generation. */ else - scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST; + scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken); } } @@ -2347,7 +2356,8 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, /* Cost of reduction op inside loop. */ - STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST; + STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) + += ncopies * vect_get_cost (vector_stmt); stmt = STMT_VINFO_STMT (stmt_info); @@ -2387,7 +2397,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, code = gimple_assign_rhs_code (orig_stmt); /* Add in cost for initial definition. */ - outer_cost += TARG_SCALAR_TO_VEC_COST; + outer_cost += vect_get_cost (scalar_to_vec); /* Determine cost of epilogue code. @@ -2397,7 +2407,8 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, if (!nested_in_vect_loop_p (loop, orig_stmt)) { if (reduc_code != ERROR_MARK) - outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST; + outer_cost += vect_get_cost (vector_stmt) + + vect_get_cost (vec_to_scalar); else { int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1); @@ -2414,12 +2425,14 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, && optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing) /* Final reduction via vector shifts and the reduction operator. Also requires scalar extract. */ - outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST - + TARG_VEC_TO_SCALAR_COST); + outer_cost += ((exact_log2(nelements) * 2) + * vect_get_cost (vector_stmt) + + vect_get_cost (vec_to_scalar)); else /* Use extracts and reduction op for final reduction. For N elements, we have N extracts and N-1 reduction ops. */ - outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST); + outer_cost += ((nelements + nelements - 1) + * vect_get_cost (vector_stmt)); } } @@ -2442,9 +2455,11 @@ static void vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies) { /* loop cost for vec_loop. */ - STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST; + STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) + = ncopies * vect_get_cost (vector_stmt); /* prologue cost for vec_init and vec_step. */ - STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST; + STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) + = 2 * vect_get_cost (scalar_to_vec); if (vect_print_dump_info (REPORT_COST)) fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, " diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 9dd284d..41c01b9 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -645,7 +645,9 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, if (permutation) { VEC_safe_push (slp_tree, heap, *loads, *node); - *inside_cost += TARG_VEC_PERMUTE_COST * group_size; + *inside_cost + += targetm.vectorize.builtin_vectorization_cost (vec_perm) + * group_size; } return true; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 7ad0988..eded879 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -553,9 +553,9 @@ cost_for_stmt (gimple stmt) switch (STMT_VINFO_TYPE (stmt_info)) { case load_vec_info_type: - return TARG_SCALAR_LOAD_COST; + return targetm.vectorize.builtin_vectorization_cost (scalar_load); case store_vec_info_type: - return TARG_SCALAR_STORE_COST; + return targetm.vectorize.builtin_vectorization_cost (scalar_store); case op_vec_info_type: case condition_vec_info_type: case assignment_vec_info_type: @@ -565,7 +565,7 @@ cost_for_stmt (gimple stmt) case type_demotion_vec_info_type: case type_conversion_vec_info_type: case call_vec_info_type: - return TARG_SCALAR_STMT_COST; + return targetm.vectorize.builtin_vectorization_cost (scalar_stmt); case undef_vec_info_type: default: gcc_unreachable (); @@ -589,13 +589,15 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, if (PURE_SLP_STMT (stmt_info)) return; - inside_cost = ncopies * TARG_VEC_STMT_COST; + inside_cost = ncopies + * targetm.vectorize.builtin_vectorization_cost (vector_stmt); /* FORNOW: Assuming maximum 2 args per stmts. */ for (i = 0; i < 2; i++) { if (dt[i] == vect_constant_def || dt[i] == vect_external_def) - outside_cost += TARG_SCALAR_TO_VEC_COST; + outside_cost + += targetm.vectorize.builtin_vectorization_cost (vector_stmt); } if (vect_print_dump_info (REPORT_COST)) @@ -643,7 +645,8 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, return; if (dt == vect_constant_def || dt == vect_external_def) - outside_cost = TARG_SCALAR_TO_VEC_COST; + outside_cost + = targetm.vectorize.builtin_vectorization_cost (scalar_to_vec); /* Strided access? */ if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node) @@ -658,7 +661,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, { /* Uses a high and low interleave operation for each needed permute. */ inside_cost = ncopies * exact_log2(group_size) * group_size - * TARG_VEC_STMT_COST; + * targetm.vectorize.builtin_vectorization_cost (vector_stmt); if (vect_print_dump_info (REPORT_COST)) fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .", @@ -667,7 +670,8 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, } /* Costs of the stores. */ - inside_cost += ncopies * TARG_VEC_STORE_COST; + inside_cost += ncopies + * targetm.vectorize.builtin_vectorization_cost (vector_store); if (vect_print_dump_info (REPORT_COST)) fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, " @@ -722,7 +726,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node) { /* Uses an even and odd extract operations for each needed permute. */ inside_cost = ncopies * exact_log2(group_size) * group_size - * TARG_VEC_STMT_COST; + * targetm.vectorize.builtin_vectorization_cost (vector_stmt); if (vect_print_dump_info (REPORT_COST)) fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .", @@ -735,7 +739,8 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node) { case dr_aligned: { - inside_cost += ncopies * TARG_VEC_LOAD_COST; + inside_cost += ncopies + * targetm.vectorize.builtin_vectorization_cost (vector_load); if (vect_print_dump_info (REPORT_COST)) fprintf (vect_dump, "vect_model_load_cost: aligned."); @@ -745,7 +750,8 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node) case dr_unaligned_supported: { /* Here, we assign an additional cost for the unaligned load. */ - inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST; + inside_cost += ncopies + * targetm.vectorize.builtin_vectorization_cost (unaligned_load); if (vect_print_dump_info (REPORT_COST)) fprintf (vect_dump, "vect_model_load_cost: unaligned supported by " @@ -755,13 +761,16 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node) } case dr_explicit_realign: { - inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST); + inside_cost += ncopies * (2 + * targetm.vectorize.builtin_vectorization_cost (vector_load) + + targetm.vectorize.builtin_vectorization_cost (vector_stmt)); /* FIXME: If the misalignment remains fixed across the iterations of the containing loop, the following cost should be added to the outside costs. */ if (targetm.vectorize.builtin_mask_for_load) - inside_cost += TARG_VEC_STMT_COST; + inside_cost + += targetm.vectorize.builtin_vectorization_cost (vector_stmt); break; } @@ -780,13 +789,16 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node) if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node) { - outside_cost = 2*TARG_VEC_STMT_COST; + outside_cost = 2 + * targetm.vectorize.builtin_vectorization_cost (vector_stmt); if (targetm.vectorize.builtin_mask_for_load) - outside_cost += TARG_VEC_STMT_COST; + outside_cost + += targetm.vectorize.builtin_vectorization_cost (vector_stmt); } - inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST); - + inside_cost += ncopies + * (targetm.vectorize.builtin_vectorization_cost (vector_load) + + targetm.vectorize.builtin_vectorization_cost (vector_stmt)); break; } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 79fe6ab..11795d8 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -543,70 +543,6 @@ typedef struct _stmt_vec_info { #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp) #define STMT_SLP_TYPE(S) (S)->slp_type -/* These are some defines for the initial implementation of the vectorizer's - cost model. These will later be target specific hooks. */ - -/* Cost of conditional taken branch. */ -#ifndef TARG_COND_TAKEN_BRANCH_COST -#define TARG_COND_TAKEN_BRANCH_COST 3 -#endif - -/* Cost of conditional not taken branch. */ -#ifndef TARG_COND_NOT_TAKEN_BRANCH_COST -#define TARG_COND_NOT_TAKEN_BRANCH_COST 1 -#endif - -/* Cost of any scalar operation, excluding load and store. */ -#ifndef TARG_SCALAR_STMT_COST -#define TARG_SCALAR_STMT_COST 1 -#endif - -/* Cost of scalar load. */ -#ifndef TARG_SCALAR_LOAD_COST -#define TARG_SCALAR_LOAD_COST 1 -#endif - -/* Cost of scalar store. */ -#ifndef TARG_SCALAR_STORE_COST -#define TARG_SCALAR_STORE_COST 1 -#endif - -/* Cost of any vector operation, excluding load, store or vector to scalar - operation. */ -#ifndef TARG_VEC_STMT_COST -#define TARG_VEC_STMT_COST 1 -#endif - -/* Cost of vector to scalar operation. */ -#ifndef TARG_VEC_TO_SCALAR_COST -#define TARG_VEC_TO_SCALAR_COST 1 -#endif - -/* Cost of scalar to vector operation. */ -#ifndef TARG_SCALAR_TO_VEC_COST -#define TARG_SCALAR_TO_VEC_COST 1 -#endif - -/* Cost of aligned vector load. */ -#ifndef TARG_VEC_LOAD_COST -#define TARG_VEC_LOAD_COST 1 -#endif - -/* Cost of misaligned vector load. */ -#ifndef TARG_VEC_UNALIGNED_LOAD_COST -#define TARG_VEC_UNALIGNED_LOAD_COST 2 -#endif - -/* Cost of vector store. */ -#ifndef TARG_VEC_STORE_COST -#define TARG_VEC_STORE_COST 1 -#endif - -/* Cost of vector permutation. */ -#ifndef TARG_VEC_PERMUTE_COST -#define TARG_VEC_PERMUTE_COST 1 -#endif - /* The maximum number of intermediate steps required in multi-step type conversion. */ #define MAX_INTERM_CVT_STEPS 3 |