aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKewen Lin <linkw@linux.ibm.com>2021-05-19 05:42:51 -0500
committerKewen Lin <linkw@linux.ibm.com>2021-05-20 03:46:47 -0500
commit9c5bd1e9811babe255ddbbdcda1d00ea5997b826 (patch)
tree7d3ee49acadcfbc6c7a4cbb4c096f398db0538f7
parent1a9b3f04c11eb467a8dc504a37dad57a371a0d4c (diff)
downloadgcc-9c5bd1e9811babe255ddbbdcda1d00ea5997b826.zip
gcc-9c5bd1e9811babe255ddbbdcda1d00ea5997b826.tar.gz
gcc-9c5bd1e9811babe255ddbbdcda1d00ea5997b826.tar.bz2
vect: Replace hardcoded inner loop cost factor
This patch is to replace the current hardcoded weight factor 50, which is applied by the loop vectorizer to the cost of statements in an inner loop relative to the loop being vectorized, with one newly added member inner_loop_cost_factor in loop vinfo. It also introduces one parameter vect-inner-loop-cost-factor whose default value is 50, and is used to initialize the inner_loop_cost_factor member. The motivation here is that: if targets want to have one unique function to gather some information in each add_stmt_cost call, no matter that it's put before or after the cost tweaking part for inner loop, it may have the need to adjust (expand or shrink) the gathered data as the factor. Now the factor is hardcoded, it's not easily maintained. Bootstrapped/regtested on powerpc64le-linux-gnu P9, x86_64-redhat-linux and aarch64-linux-gnu. gcc/ChangeLog: * doc/invoke.texi (vect-inner-loop-cost-factor): Document new parameter. * params.opt (vect-inner-loop-cost-factor): New. * targhooks.c (default_add_stmt_cost): Replace hardcoded factor 50 with LOOP_VINFO_INNER_LOOP_COST_FACTOR, include head file tree-vectorizer.h and its required ones. * config/aarch64/aarch64.c (aarch64_add_stmt_cost): Replace hardcoded factor 50 with LOOP_VINFO_INNER_LOOP_COST_FACTOR. * config/arm/arm.c (arm_add_stmt_cost): Likewise. * config/i386/i386.c (ix86_add_stmt_cost): Likewise. * config/rs6000/rs6000.c (rs6000_add_stmt_cost): Likewise. * tree-vect-loop.c (vect_compute_single_scalar_iteration_cost): Likewise. (_loop_vec_info::_loop_vec_info): Init inner_loop_cost_factor. * tree-vectorizer.h (_loop_vec_info): Add inner_loop_cost_factor. (LOOP_VINFO_INNER_LOOP_COST_FACTOR): New macro.
-rw-r--r--gcc/config/aarch64/aarch64.c5
-rw-r--r--gcc/config/arm/arm.c6
-rw-r--r--gcc/config/i386/i386.c6
-rw-r--r--gcc/config/rs6000/rs6000.c6
-rw-r--r--gcc/doc/invoke.texi5
-rw-r--r--gcc/params.opt4
-rw-r--r--gcc/targhooks.c9
-rw-r--r--gcc/tree-vect-loop.c3
-rw-r--r--gcc/tree-vectorizer.h5
9 files changed, 43 insertions, 6 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0835646..c1e451e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -15435,7 +15435,10 @@ aarch64_add_stmt_cost (class vec_info *vinfo, void *data, int count,
arbitrary and could potentially be improved with analysis. */
if (where == vect_body && stmt_info
&& stmt_in_inner_loop_p (vinfo, stmt_info))
- count *= 50; /* FIXME */
+ {
+ gcc_assert (loop_vinfo);
+ count *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo); /* FIXME */
+ }
retval = (unsigned) (count * stmt_cost);
costs->region[where] += retval;
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 28cfd81..caf4e56 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -12226,7 +12226,11 @@ arm_add_stmt_cost (vec_info *vinfo, void *data, int count,
arbitrary and could potentially be improved with analysis. */
if (where == vect_body && stmt_info
&& stmt_in_inner_loop_p (vinfo, stmt_info))
- count *= 50; /* FIXME. */
+ {
+ loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
+ gcc_assert (loop_vinfo);
+ count *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo); /* FIXME. */
+ }
retval = (unsigned) (count * stmt_cost);
cost[where] += retval;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 743d8a2..f3b4518 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -22482,7 +22482,11 @@ ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count,
arbitrary and could potentially be improved with analysis. */
if (where == vect_body && stmt_info
&& stmt_in_inner_loop_p (vinfo, stmt_info))
- count *= 50; /* FIXME. */
+ {
+ loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
+ gcc_assert (loop_vinfo);
+ count *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo); /* FIXME. */
+ }
retval = (unsigned) (count * stmt_cost);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6db450a..dfa517b 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5364,7 +5364,11 @@ rs6000_add_stmt_cost (class vec_info *vinfo, void *data, int count,
arbitrary and could potentially be improved with analysis. */
if (where == vect_body && stmt_info
&& stmt_in_inner_loop_p (vinfo, stmt_info))
- count *= 50; /* FIXME. */
+ {
+ loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
+ gcc_assert (loop_vinfo);
+ count *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo); /* FIXME. */
+ }
retval = (unsigned) (count * stmt_cost);
cost_data->cost[where] += retval;
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 5e5fb16..49c74f3 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14226,6 +14226,11 @@ code to iterate. 2 allows partial vector loads and stores in all loops.
The parameter only has an effect on targets that support partial
vector loads and stores.
+@item vect-inner-loop-cost-factor
+The factor which the loop vectorizer applies to the cost of statements
+in an inner loop relative to the loop being vectorized. The default
+value is 50.
+
@item avoid-fma-max-bits
Maximum number of bits for which we avoid creating FMAs.
diff --git a/gcc/params.opt b/gcc/params.opt
index 2e4cbdd..82600b9 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -1089,4 +1089,8 @@ Bound on number of runtime checks inserted by the vectorizer's loop versioning f
Common Joined UInteger Var(param_vect_partial_vector_usage) Init(2) IntegerRange(0, 2) Param Optimization
Controls how loop vectorizer uses partial vectors. 0 means never, 1 means only for loops whose need to iterate can be removed, 2 means for all loops. The default value is 2.
+-param=vect-inner-loop-cost-factor=
+Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 999999) Param Optimization
+The factor which the loop vectorizer applies to the cost of statements in an inner loop relative to the loop being vectorized.
+
; This comment is to ensure we retain the blank line above.
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 1947ef2..2d2de04 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -90,6 +90,9 @@ along with GCC; see the file COPYING3. If not see
#include "attribs.h"
#include "asan.h"
#include "emit-rtl.h"
+#include "gimple.h"
+#include "cfgloop.h"
+#include "tree-vectorizer.h"
bool
default_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
@@ -1480,7 +1483,11 @@ default_add_stmt_cost (class vec_info *vinfo, void *data, int count,
arbitrary and could potentially be improved with analysis. */
if (where == vect_body && stmt_info
&& stmt_in_inner_loop_p (vinfo, stmt_info))
- count *= 50; /* FIXME. */
+ {
+ loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
+ gcc_assert (loop_vinfo);
+ count *= LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo);
+ }
retval = (unsigned) (count * stmt_cost);
cost[where] += retval;
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index f10e66a..ff7673d 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -836,6 +836,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
single_scalar_iteration_cost (0),
vec_outside_cost (0),
vec_inside_cost (0),
+ inner_loop_cost_factor (param_vect_inner_loop_cost_factor),
vectorizable (false),
can_use_partial_vectors_p (param_vect_partial_vector_usage != 0),
using_partial_vectors_p (false),
@@ -1237,7 +1238,7 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
/* FORNOW. */
innerloop_iters = 1;
if (loop->inner)
- innerloop_iters = 50; /* FIXME */
+ innerloop_iters = LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo);
for (i = 0; i < nbbs; i++)
{
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 8d1ffaf..7dcb4cd 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -689,6 +689,10 @@ public:
/* The cost of the vector loop body. */
int vec_inside_cost;
+ /* The factor used to over weight those statements in an inner loop
+ relative to the loop being vectorized. */
+ unsigned int inner_loop_cost_factor;
+
/* Is the loop vectorizable? */
bool vectorizable;
@@ -807,6 +811,7 @@ public:
#define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost
#define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info
#define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond
+#define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
#define LOOP_VINFO_FULLY_MASKED_P(L) \
(LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \