diff options
author | Kewen Lin <linkw@linux.ibm.com> | 2021-05-19 05:42:51 -0500 |
---|---|---|
committer | Kewen Lin <linkw@linux.ibm.com> | 2021-05-20 03:46:47 -0500 |
commit | 9c5bd1e9811babe255ddbbdcda1d00ea5997b826 (patch) | |
tree | 7d3ee49acadcfbc6c7a4cbb4c096f398db0538f7 /gcc | |
parent | 1a9b3f04c11eb467a8dc504a37dad57a371a0d4c (diff) | |
download | gcc-9c5bd1e9811babe255ddbbdcda1d00ea5997b826.zip gcc-9c5bd1e9811babe255ddbbdcda1d00ea5997b826.tar.gz gcc-9c5bd1e9811babe255ddbbdcda1d00ea5997b826.tar.bz2 |
vect: Replace hardcoded inner loop cost factor
This patch is to replace the current hardcoded weight factor
50, which is applied by the loop vectorizer to the cost of
statements in an inner loop relative to the loop being
vectorized, with one newly added member inner_loop_cost_factor
in loop vinfo. It also introduces one parameter
vect-inner-loop-cost-factor whose default value is 50, and
is used to initialize the inner_loop_cost_factor member.
The motivation here is that: if targets want to have one
unique function to gather some information in each add_stmt_cost
call, no matter that it's put before or after the cost tweaking
part for inner loop, it may have the need to adjust (expand or
shrink) the gathered data as the factor. Now the factor is
hardcoded, it's not easily maintained.
Bootstrapped/regtested on powerpc64le-linux-gnu P9,
x86_64-redhat-linux and aarch64-linux-gnu.
gcc/ChangeLog:
* doc/invoke.texi (vect-inner-loop-cost-factor): Document new
parameter.
* params.opt (vect-inner-loop-cost-factor): New.
* targhooks.c (default_add_stmt_cost): Replace hardcoded factor
50 with LOOP_VINFO_INNER_LOOP_COST_FACTOR, include head file
tree-vectorizer.h and its required ones.
* config/aarch64/aarch64.c (aarch64_add_stmt_cost): Replace
hardcoded factor 50 with LOOP_VINFO_INNER_LOOP_COST_FACTOR.
* config/arm/arm.c (arm_add_stmt_cost): Likewise.
* config/i386/i386.c (ix86_add_stmt_cost): Likewise.
* config/rs6000/rs6000.c (rs6000_add_stmt_cost): Likewise.
* tree-vect-loop.c (vect_compute_single_scalar_iteration_cost):
Likewise.
(_loop_vec_info::_loop_vec_info): Init inner_loop_cost_factor.
* tree-vectorizer.h (_loop_vec_info): Add inner_loop_cost_factor.
(LOOP_VINFO_INNER_LOOP_COST_FACTOR): New macro.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 5 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 6 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 6 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 6 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 5 | ||||
-rw-r--r-- | gcc/params.opt | 4 | ||||
-rw-r--r-- | gcc/targhooks.c | 9 | ||||
-rw-r--r-- | gcc/tree-vect-loop.c | 3 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 5 |
9 files changed, 43 insertions, 6 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 0835646..c1e451e 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -15435,7 +15435,10 @@ aarch64_add_stmt_cost (class vec_info *vinfo, void *data, int count, arbitrary and could potentially be improved with analysis. */ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (vinfo, stmt_info)) - count *= 50; /* FIXME */ + { + gcc_assert (loop_vinfo); + count *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo); /* FIXME */ + } retval = (unsigned) (count * stmt_cost); costs->region[where] += retval; diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 28cfd81..caf4e56 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -12226,7 +12226,11 @@ arm_add_stmt_cost (vec_info *vinfo, void *data, int count, arbitrary and could potentially be improved with analysis. */ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (vinfo, stmt_info)) - count *= 50; /* FIXME. */ + { + loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo); + gcc_assert (loop_vinfo); + count *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo); /* FIXME. */ + } retval = (unsigned) (count * stmt_cost); cost[where] += retval; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 743d8a2..f3b4518 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -22482,7 +22482,11 @@ ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count, arbitrary and could potentially be improved with analysis. */ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (vinfo, stmt_info)) - count *= 50; /* FIXME. */ + { + loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo); + gcc_assert (loop_vinfo); + count *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo); /* FIXME. */ + } retval = (unsigned) (count * stmt_cost); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 6db450a..dfa517b 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -5364,7 +5364,11 @@ rs6000_add_stmt_cost (class vec_info *vinfo, void *data, int count, arbitrary and could potentially be improved with analysis. */ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (vinfo, stmt_info)) - count *= 50; /* FIXME. */ + { + loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo); + gcc_assert (loop_vinfo); + count *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo); /* FIXME. */ + } retval = (unsigned) (count * stmt_cost); cost_data->cost[where] += retval; diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 5e5fb16..49c74f3 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -14226,6 +14226,11 @@ code to iterate. 2 allows partial vector loads and stores in all loops. The parameter only has an effect on targets that support partial vector loads and stores. +@item vect-inner-loop-cost-factor +The factor which the loop vectorizer applies to the cost of statements +in an inner loop relative to the loop being vectorized. The default +value is 50. + @item avoid-fma-max-bits Maximum number of bits for which we avoid creating FMAs. diff --git a/gcc/params.opt b/gcc/params.opt index 2e4cbdd..82600b9 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -1089,4 +1089,8 @@ Bound on number of runtime checks inserted by the vectorizer's loop versioning f Common Joined UInteger Var(param_vect_partial_vector_usage) Init(2) IntegerRange(0, 2) Param Optimization Controls how loop vectorizer uses partial vectors. 0 means never, 1 means only for loops whose need to iterate can be removed, 2 means for all loops. The default value is 2. +-param=vect-inner-loop-cost-factor= +Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 999999) Param Optimization +The factor which the loop vectorizer applies to the cost of statements in an inner loop relative to the loop being vectorized. + ; This comment is to ensure we retain the blank line above. diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 1947ef2..2d2de04 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -90,6 +90,9 @@ along with GCC; see the file COPYING3. If not see #include "attribs.h" #include "asan.h" #include "emit-rtl.h" +#include "gimple.h" +#include "cfgloop.h" +#include "tree-vectorizer.h" bool default_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED, @@ -1480,7 +1483,11 @@ default_add_stmt_cost (class vec_info *vinfo, void *data, int count, arbitrary and could potentially be improved with analysis. */ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (vinfo, stmt_info)) - count *= 50; /* FIXME. */ + { + loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo); + gcc_assert (loop_vinfo); + count *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo); + } retval = (unsigned) (count * stmt_cost); cost[where] += retval; diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index f10e66a..ff7673d 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -836,6 +836,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) single_scalar_iteration_cost (0), vec_outside_cost (0), vec_inside_cost (0), + inner_loop_cost_factor (param_vect_inner_loop_cost_factor), vectorizable (false), can_use_partial_vectors_p (param_vect_partial_vector_usage != 0), using_partial_vectors_p (false), @@ -1237,7 +1238,7 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo) /* FORNOW. */ innerloop_iters = 1; if (loop->inner) - innerloop_iters = 50; /* FIXME */ + innerloop_iters = LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo); for (i = 0; i < nbbs; i++) { diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 8d1ffaf..7dcb4cd 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -689,6 +689,10 @@ public: /* The cost of the vector loop body. */ int vec_inside_cost; + /* The factor used to over weight those statements in an inner loop + relative to the loop being vectorized. */ + unsigned int inner_loop_cost_factor; + /* Is the loop vectorizable? */ bool vectorizable; @@ -807,6 +811,7 @@ public: #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond +#define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor #define LOOP_VINFO_FULLY_MASKED_P(L) \ (LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \ |