aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXinliang David Li <davidxl@google.com>2013-09-27 16:48:15 +0000
committerXinliang David Li <davidxl@gcc.gnu.org>2013-09-27 16:48:15 +0000
commitd6d1127249564146429009e0682f25bd58d7a791 (patch)
treef6309abb88289dcaa19aca96778e0ef0659d91a6
parentac1857a3ef390df0c1b8faf745f855e8c5346b10 (diff)
downloadgcc-d6d1127249564146429009e0682f25bd58d7a791.zip
gcc-d6d1127249564146429009e0682f25bd58d7a791.tar.gz
gcc-d6d1127249564146429009e0682f25bd58d7a791.tar.bz2
vectorizer cost model enhancement
From-SVN: r202980
-rw-r--r--gcc/ChangeLog21
-rw-r--r--gcc/common.opt28
-rw-r--r--gcc/common/config/i386/i386-common.c1
-rw-r--r--gcc/config/i386/i386.c21
-rw-r--r--gcc/doc/invoke.texi35
-rw-r--r--gcc/flag-types.h9
-rw-r--r--gcc/opts.c16
-rw-r--r--gcc/targhooks.c23
-rw-r--r--gcc/tree-vect-data-refs.c20
-rw-r--r--gcc/tree-vect-loop.c2
-rw-r--r--gcc/tree-vect-slp.c2
-rw-r--r--gcc/tree-vectorizer.h8
12 files changed, 123 insertions, 63 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4b33045..9712bc8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,24 @@
+2013-09-27 Xinliang David Li <davidxl@google.com>
+
+ * opts.c (finish_options): Adjust parameters
+ according to vect cost model.
+ (common_handle_option): Set dynamic vect cost
+ model for FDO.
+ targhooks.c (default_add_stmt_cost): Compute stmt cost
+ unconditionally.
+ * tree-vect-loop.c (vect_estimate_min_profitable_iters):
+ Use helper function.
+ * tree-vectorizer.h (unlimited_cost_model): New function.
+ * tree-vect-slp.c (vect_slp_analyze_bb_1): Use helper function.
+ * tree-vect-data-refs.c (vect_peeling_hash_insert): Use helper
+ function.
+ (vect_enhance_data_refs_alignment): Ditto.
+ * flag-types.h: New enum.
+ * common/config/i386/i386-common.c (ix86_option_init_struct):
+ No need to initialize vect_cost_model flag.
+ * config/i386/i386.c (ix86_add_stmt_cost): Compute stmt cost
+ unconditionally.
+
2013-09-27 Diego Novillo <dnovillo@google.com>
* gimple.h (enum ssa_mode): Remove.
diff --git a/gcc/common.opt b/gcc/common.opt
index 202e169..c2b3d35 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2278,13 +2278,33 @@ ftree-slp-vectorize
Common Report Var(flag_tree_slp_vectorize) Optimization
Enable basic block vectorization (SLP) on trees
+fvect-cost-model=
+Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT)
+Specifies the cost model for vectorization
+
+Enum
+Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
+
+EnumValue
+Enum(vect_cost_model) String(unlimited) Value(VECT_COST_MODEL_UNLIMITED)
+
+EnumValue
+Enum(vect_cost_model) String(dynamic) Value(VECT_COST_MODEL_DYNAMIC)
+
+EnumValue
+Enum(vect_cost_model) String(cheap) Value(VECT_COST_MODEL_CHEAP)
+
fvect-cost-model
-Common Report Var(flag_vect_cost_model) Optimization
-Enable use of cost model in vectorization
+Common RejectNegative Alias(fvect-cost-model=,dynamic)
+Enables the dynamic vectorizer cost model. Preserved for backward compatibility.
+
+fno-vect-cost-model
+Common RejectNegative Alias(fvect-cost-model=,unlimited)
+Enables the unlimited vectorizer cost model. Preserved for backward compatibility.
ftree-vect-loop-version
-Common Report Var(flag_tree_vect_loop_version) Init(1) Optimization
-Enable loop versioning when doing loop vectorization on trees
+Common Ignore
+Does nothing. Preserved for backward compatibility.
ftree-scev-cprop
Common Report Var(flag_tree_scev_cprop) Init(1) Optimization
diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index 8ca74b9..341637b 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -811,7 +811,6 @@ ix86_option_init_struct (struct gcc_options *opts)
opts->x_flag_pcc_struct_return = 2;
opts->x_flag_asynchronous_unwind_tables = 2;
- opts->x_flag_vect_cost_model = 1;
}
/* On the x86 -fsplit-stack and -fstack-protector both use the same
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index f10113f..21fc531 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -42782,20 +42782,17 @@ ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
unsigned *cost = (unsigned *) data;
unsigned retval = 0;
- if (flag_vect_cost_model)
- {
- tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
- int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+ tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+ int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
- /* Statements in an inner loop relative to the loop being
- vectorized are weighted more heavily. The value here is
- arbitrary and could potentially be improved with analysis. */
- if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
- count *= 50; /* FIXME. */
+ /* Statements in an inner loop relative to the loop being
+ vectorized are weighted more heavily. The value here is
+ arbitrary and could potentially be improved with analysis. */
+ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+ count *= 50; /* FIXME. */
- retval = (unsigned) (count * stmt_cost);
- cost[where] += retval;
- }
+ retval = (unsigned) (count * stmt_cost);
+ cost[where] += retval;
return retval;
}
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 508bbb4..8bfd3db 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -423,7 +423,7 @@ Objective-C and Objective-C++ Dialects}.
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol
-ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol
-ftree-switch-conversion -ftree-tail-merge -ftree-ter @gol
--ftree-vect-loop-version -ftree-vectorize -ftree-vrp @gol
+-ftree-vectorize -ftree-vrp @gol
-funit-at-a-time -funroll-all-loops -funroll-loops @gol
-funsafe-loop-optimizations -funsafe-math-optimizations -funswitch-loops @gol
-fvariable-expansion-in-unroller -fvect-cost-model -fvpt -fweb @gol
@@ -6770,7 +6770,7 @@ optimizations designed to reduce code size.
@option{-Os} disables the following optimization flags:
@gccoptlist{-falign-functions -falign-jumps -falign-loops @gol
-falign-labels -freorder-blocks -freorder-blocks-and-partition @gol
--fprefetch-loop-arrays -ftree-vect-loop-version}
+-fprefetch-loop-arrays}
@item -Ofast
@opindex Ofast
@@ -8025,19 +8025,20 @@ Perform loop vectorization on trees. This flag is enabled by default at
Perform basic block vectorization on trees. This flag is enabled by default at
@option{-O3} and when @option{-ftree-vectorize} is enabled.
-@item -ftree-vect-loop-version
-@opindex ftree-vect-loop-version
-Perform loop versioning when doing loop vectorization on trees. When a loop
-appears to be vectorizable except that data alignment or data dependence cannot
-be determined at compile time, then vectorized and non-vectorized versions of
-the loop are generated along with run-time checks for alignment or dependence
-to control which version is executed. This option is enabled by default
-except at level @option{-Os} where it is disabled.
-
-@item -fvect-cost-model
+@item -fvect-cost-model=@var{model}
@opindex fvect-cost-model
-Enable cost model for vectorization. This option is enabled by default at
-@option{-O3}.
+Alter the cost model used for vectorization. The @var{model} argument
+should be one of @code{unlimited}, @code{dynamic} or @code{cheap}.
+With the @code{unlimited} model the vectorized code-path is assumed
+to be profitable while with the @code{dynamic} model a runtime check
+will guard the vectorized code-path to enable it only for iteration
+counts that will likely execute faster than when executing the original
+scalar loop. The @code{cheap} model will disable vectorization of
+loops where doing so would be cost prohibitive for example due to
+required runtime checks for data dependence or alignment but otherwise
+is equal to the @code{dynamic} model.
+The default cost model depends on other optimization flags and is
+either @code{dynamic} or @code{cheap}.
@item -ftree-vrp
@opindex ftree-vrp
@@ -9443,13 +9444,11 @@ constraints. The default value is 0.
@item vect-max-version-for-alignment-checks
The maximum number of run-time checks that can be performed when
-doing loop versioning for alignment in the vectorizer. See option
-@option{-ftree-vect-loop-version} for more information.
+doing loop versioning for alignment in the vectorizer.
@item vect-max-version-for-alias-checks
The maximum number of run-time checks that can be performed when
-doing loop versioning for alias in the vectorizer. See option
-@option{-ftree-vect-loop-version} for more information.
+doing loop versioning for alias in the vectorizer.
@item vect-max-peeling-for-alignment
The maximum number of loop peels to enhance access alignment
diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index 45616bc..a2be8bb 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -191,6 +191,15 @@ enum fp_contract_mode {
FP_CONTRACT_FAST = 2
};
+/* Vectorizer cost-model. */
+enum vect_cost_model {
+ VECT_COST_MODEL_UNLIMITED = 0,
+ VECT_COST_MODEL_CHEAP = 1,
+ VECT_COST_MODEL_DYNAMIC = 2,
+ VECT_COST_MODEL_DEFAULT = 3
+};
+
+
/* Different instrumentation modes. */
enum sanitize_code {
/* AddressSanitizer. */
diff --git a/gcc/opts.c b/gcc/opts.c
index 944834c..b1fadb1 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -486,6 +486,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_2_PLUS, OPT_falign_labels, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_falign_functions, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP },
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_foptimize_strlen, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 },
@@ -500,7 +501,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 },
- { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model, NULL, 1 },
+ { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC },
{ OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
@@ -825,6 +826,17 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
}
}
+ /* Tune vectorization related parametees according to cost model. */
+ if (opts->x_flag_vect_cost_model == VECT_COST_MODEL_CHEAP)
+ {
+ maybe_set_param_value (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS,
+ 6, opts->x_param_values, opts_set->x_param_values);
+ maybe_set_param_value (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS,
+ 0, opts->x_param_values, opts_set->x_param_values);
+ maybe_set_param_value (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT,
+ 0, opts->x_param_values, opts_set->x_param_values);
+ }
+
/* Set PARAM_MAX_STORES_TO_SINK to 0 if either vectorization or if-conversion
is disabled. */
if ((!opts->x_flag_tree_loop_vectorize && !opts->x_flag_tree_slp_vectorize)
@@ -1669,7 +1681,7 @@ common_handle_option (struct gcc_options *opts,
&& !opts_set->x_flag_tree_vectorize)
opts->x_flag_tree_slp_vectorize = value;
if (!opts_set->x_flag_vect_cost_model)
- opts->x_flag_vect_cost_model = value;
+ opts->x_flag_vect_cost_model = VECT_COST_MODEL_DYNAMIC;
if (!opts_set->x_flag_tree_loop_distribute_patterns)
opts->x_flag_tree_loop_distribute_patterns = value;
/* Indirect call profiling should do all useful transformations
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 03db7b4..798aacf 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1057,20 +1057,17 @@ default_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
unsigned *cost = (unsigned *) data;
unsigned retval = 0;
- if (flag_vect_cost_model)
- {
- tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
- int stmt_cost = default_builtin_vectorization_cost (kind, vectype,
+ tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+ int stmt_cost = default_builtin_vectorization_cost (kind, vectype,
misalign);
- /* Statements in an inner loop relative to the loop being
- vectorized are weighted more heavily. The value here is
- arbitrary and could potentially be improved with analysis. */
- if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
- count *= 50; /* FIXME. */
-
- retval = (unsigned) (count * stmt_cost);
- cost[where] += retval;
- }
+ /* Statements in an inner loop relative to the loop being
+ vectorized are weighted more heavily. The value here is
+ arbitrary and could potentially be improved with analysis. */
+ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+ count *= 50; /* FIXME. */
+
+ retval = (unsigned) (count * stmt_cost);
+ cost[where] += retval;
return retval;
}
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index c8cdcb8..b8988d9 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -1115,7 +1115,7 @@ vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
*new_slot = slot;
}
- if (!supportable_dr_alignment && !flag_vect_cost_model)
+ if (!supportable_dr_alignment && unlimited_cost_model ())
slot->count += VECT_MAX_COST;
}
@@ -1225,7 +1225,7 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
res.peel_info.dr = NULL;
res.body_cost_vec = stmt_vector_for_cost();
- if (flag_vect_cost_model)
+ if (!unlimited_cost_model ())
{
res.inside_cost = INT_MAX;
res.outside_cost = INT_MAX;
@@ -1454,7 +1454,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
vectorization factor.
We do this automtically for cost model, since we calculate cost
for every peeling option. */
- if (!flag_vect_cost_model)
+ if (unlimited_cost_model ())
possible_npeel_number = vf /nelements;
/* Handle the aligned case. We may decide to align some other
@@ -1462,7 +1462,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
if (DR_MISALIGNMENT (dr) == 0)
{
npeel_tmp = 0;
- if (!flag_vect_cost_model)
+ if (unlimited_cost_model ())
possible_npeel_number++;
}
@@ -1795,16 +1795,14 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
/* (2) Versioning to force alignment. */
/* Try versioning if:
- 1) flag_tree_vect_loop_version is TRUE
- 2) optimize loop for speed
- 3) there is at least one unsupported misaligned data ref with an unknown
+ 1) optimize loop for speed
+ 2) there is at least one unsupported misaligned data ref with an unknown
misalignment, and
- 4) all misaligned data refs with a known misalignment are supported, and
- 5) the number of runtime alignment checks is within reason. */
+ 3) all misaligned data refs with a known misalignment are supported, and
+ 4) the number of runtime alignment checks is within reason. */
do_versioning =
- flag_tree_vect_loop_version
- && optimize_loop_nest_for_speed_p (loop)
+ optimize_loop_nest_for_speed_p (loop)
&& (!loop->inner); /* FORNOW */
if (do_versioning)
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 072d44e..baea8b7 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2680,7 +2680,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
/* Cost model disabled. */
- if (!flag_vect_cost_model)
+ if (unlimited_cost_model ())
{
dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
*ret_min_profitable_niters = 0;
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 8ed0fc5..b3b3abe 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2168,7 +2168,7 @@ vect_slp_analyze_bb_1 (basic_block bb)
}
/* Cost model: check if the vectorization is worthwhile. */
- if (flag_vect_cost_model
+ if (!unlimited_cost_model ()
&& !vect_bb_vectorization_profitable_p (bb_vinfo))
{
if (dump_enabled_p ())
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 9c7753e..7cb8f4d 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -880,6 +880,14 @@ known_alignment_for_access_p (struct data_reference *data_ref_info)
return (DR_MISALIGNMENT (data_ref_info) != -1);
}
+
+/* Return true if the vect cost model is unlimited. */
+static inline bool
+unlimited_cost_model ()
+{
+ return flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED;
+}
+
/* Source location */
extern LOC vect_location;