diff options
-rw-r--r-- | gcc/ChangeLog | 18 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/g++.dg/tree-ssa/pr90078.C | 199 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-ivopts.c | 56 |
4 files changed, 253 insertions, 25 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index be91d03..08b37f2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,23 @@ 2019-05-08 Bin Cheng <bin.cheng@linux.alibaba.com> + PR tree-optimization/90078 + * tree-ssa-loop-ivopts.c (inttypes.h): Include new header file. + (INFTY): Increase the value for infinite cost. + (struct comp_cost): Promote type of members to int64_t. + (infinite_cost): Don't set complexity in initialization. + (comp_cost::operator +,-,+=,-+,/=,*=): Assert when cost computation + overflows to infinite_cost. + (adjust_setup_cost): Promote type of parameter and cost computation + to int64_t. + (struct ainc_cost_data, struct iv_ca): Promote type of member to + int64_t. + (get_scaled_computation_cost_at, determine_iv_cost): Promote type of + cost computation to int64_t. + (determine_group_iv_costs, iv_ca_dump, find_optimal_iv_set): Use + int64_t's format specifier in dump. + +2019-05-08 Bin Cheng <bin.cheng@linux.alibaba.com> + PR tree-optimization/90240 * tree-ssa-loop-ivopts.c (get_scaled_computation_cost_at): Scale cost with respect to scaling factor pre-computed for each basic block. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 897a46a..3042f82 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,10 @@ 2018-05-08 Bin Cheng <bin.cheng@linux.alibaba.com> + PR tree-optimization/90078 + * g++.dg/tree-ssa/pr90078.C: New test. + +2018-05-08 Bin Cheng <bin.cheng@linux.alibaba.com> + PR tree-optimization/90240 * gfortran.dg/graphite/pr90240.f: New test. diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr90078.C b/gcc/testsuite/g++.dg/tree-ssa/pr90078.C new file mode 100644 index 0000000..e36f50e --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr90078.C @@ -0,0 +1,199 @@ +// { dg-do compile } +// { dg-options "-std=c++14 -O2 -ftemplate-depth=1000000" } + +template <class T, int Dim0, int Dim1, int Dim2> struct Tensor3; +template <class A, class T, int Dim0, int Dim1, int Dim2, char i, char j, + char k> +struct Tensor3_Expr; + +template <class T, int Dim0, int Dim1, int Dim2, int Dim3> struct Tensor4; +template <class A, class T, int Dim0, int Dim1, int Dim2, int Dim3, char i, + char j, char k, char l> +struct Tensor4_Expr; + +template <char i, int Dim> struct Index +{}; +template <const int N> struct Number +{ + Number(){}; + operator int() const { return N; } +}; + +template <class T, int Tensor_Dim0, int Tensor_Dim1, int Tensor_Dim2> +struct Tensor3 +{ + T data[Tensor_Dim0][Tensor_Dim1][Tensor_Dim2]; + + T operator()(const int N1, const int N2, const int N3) const + { + return data[N1][N2][N3]; + } + + template <char i, char j, char k, int Dim0, int Dim1, int Dim2> + Tensor3_Expr<const Tensor3<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2>, T, + Dim0, Dim1, Dim2, i, j, k> + operator()(const Index<i, Dim0>, const Index<j, Dim1>, + const Index<k, Dim2>) const + { + return Tensor3_Expr<const Tensor3<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2>, + T, Dim0, Dim1, Dim2, i, j, k>(*this); + } +}; + +template <class A, class T, int Dim0, int Dim1, int Dim2, char i, char j, + char k> +struct Tensor3_Expr +{ + A iter; + + Tensor3_Expr(const A &a) : iter(a) {} + T operator()(const int N1, const int N2, const int N3) const + { + return iter(N1, N2, N3); + } +}; + +template <class A, class T, int Tensor_Dim0, int Tensor_Dim1, int Tensor_Dim2, + int Dim0, int Dim1, int Dim2, char i, char j, char k> +struct Tensor3_Expr<Tensor3<A, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2>, T, Dim0, + Dim1, Dim2, i, j, k> +{ + Tensor3<A, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2> &iter; + + Tensor3_Expr(Tensor3<A, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2> &a) : iter(a) + {} + T operator()(const int N1, const int N2, const int N3) const + { + return iter(N1, N2, N3); + } +}; + +template <class A, class B, class T, class U, int Dim0, int Dim1, int Dim23, + int Dim4, int Dim5, char i, char j, char k, char l, char m> +struct Tensor3_times_Tensor3_21 +{ + Tensor3_Expr<A, T, Dim0, Dim1, Dim23, i, j, k> iterA; + Tensor3_Expr<B, U, Dim23, Dim4, Dim5, k, l, m> iterB; + + template <int CurrentDim> + T eval(const int N1, const int N2, const int N3, const int N4, + const Number<CurrentDim> &) const + { + return iterA(N1, N2, CurrentDim - 1) * iterB(CurrentDim - 1, N3, N4) + + eval(N1, N2, N3, N4, Number<CurrentDim - 1>()); + } + T eval(const int N1, const int N2, const int N3, const int N4, + const Number<1> &) const + { + return iterA(N1, N2, 0) * iterB(0, N3, N4); + } + + Tensor3_times_Tensor3_21( + const Tensor3_Expr<A, T, Dim0, Dim1, Dim23, i, j, k> &a, + const Tensor3_Expr<B, U, Dim23, Dim4, Dim5, k, l, m> &b) + : iterA(a), iterB(b) + {} + T operator()(const int &N1, const int &N2, const int &N3, + const int &N4) const + { + return eval(N1, N2, N3, N4, Number<Dim23>()); + } +}; + +template <class A, class B, class T, class U, int Dim0, int Dim1, int Dim23, + int Dim4, int Dim5, char i, char j, char k, char l, char m> +Tensor4_Expr<Tensor3_times_Tensor3_21<A, B, T, U, Dim0, Dim1, Dim23, Dim4, + Dim5, i, j, k, l, m>, + T, Dim0, Dim1, Dim4, Dim5, i, j, l, m> +operator*(const Tensor3_Expr<A, T, Dim0, Dim1, Dim23, i, j, k> &a, + const Tensor3_Expr<B, U, Dim23, Dim4, Dim5, k, l, m> &b) +{ + using TensorExpr = Tensor3_times_Tensor3_21<A, B, T, U, Dim0, Dim1, Dim23, + Dim4, Dim5, i, j, k, l, m>; + return Tensor4_Expr<TensorExpr, T, Dim0, Dim1, Dim4, Dim5, i, j, l, m>( + TensorExpr(a, b)); +}; + +template <class T, int Tensor_Dim0, int Tensor_Dim1, int Tensor_Dim2, + int Tensor_Dim3> +struct Tensor4 +{ + T data[Tensor_Dim0][Tensor_Dim1][Tensor_Dim2][Tensor_Dim3]; + + Tensor4() {} + T &operator()(const int N1, const int N2, const int N3, const int N4) + { + return data[N1][N2][N3][N4]; + } + + template <char i, char j, char k, char l, int Dim0, int Dim1, int Dim2, + int Dim3> + Tensor4_Expr<Tensor4<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2, Tensor_Dim3>, + T, Dim0, Dim1, Dim2, Dim3, i, j, k, l> + operator()(const Index<i, Dim0>, const Index<j, Dim1>, const Index<k, Dim2>, + const Index<l, Dim3>) + { + return Tensor4_Expr< + Tensor4<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2, Tensor_Dim3>, T, Dim0, + Dim1, Dim2, Dim3, i, j, k, l>(*this); + }; +}; + +template <class A, class T, int Dim0, int Dim1, int Dim2, int Dim3, char i, + char j, char k, char l> +struct Tensor4_Expr +{ + A iter; + + Tensor4_Expr(const A &a) : iter(a) {} + T operator()(const int N1, const int N2, const int N3, const int N4) const + { + return iter(N1, N2, N3, N4); + } +}; + +template <class A, class T, int Dim0, int Dim1, int Dim2, int Dim3, char i, + char j, char k, char l> +struct Tensor4_Expr<Tensor4<A, Dim0, Dim1, Dim2, Dim3>, T, Dim0, Dim1, Dim2, + Dim3, i, j, k, l> +{ + Tensor4<A, Dim0, Dim1, Dim2, Dim3> &iter; + + Tensor4_Expr(Tensor4<A, Dim0, Dim1, Dim2, Dim3> &a) : iter(a) {} + T operator()(const int N1, const int N2, const int N3, const int N4) const + { + return iter(N1, N2, N3, N4); + } + + template <class B, class U, int Dim1_0, int Dim1_1, int Dim1_2, int Dim1_3, + char i_1, char j_1, char k_1, char l_1> + auto &operator=(const Tensor4_Expr<B, U, Dim1_0, Dim1_1, Dim1_2, Dim1_3, i_1, + j_1, k_1, l_1> &rhs) + { + for(int ii = 0; ii < Dim0; ++ii) + for(int jj = 0; jj < Dim1; ++jj) + for(int kk = 0; kk < Dim2; ++kk) + for(int ll = 0; ll < Dim3; ++ll) + { + iter(ii, jj, kk, ll) = rhs(ii, jj, kk, ll); + } + return *this; + } +}; + +int main() +{ + Tensor3<float, 100, 100, 1000> t1; + Tensor3<float, 1000, 100, 100> t2; + + Index<'l', 100> l; + Index<'m', 100> m; + Index<'k', 1000> k; + Index<'n', 100> n; + Index<'o', 100> o; + + Tensor4<float, 100, 100, 100, 100> res; + res(l, m, n, o) = t1(l, m, k) * t2(k, n, o); + return 0; +} + diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index 534e146..9864b59 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -114,7 +114,7 @@ along with GCC; see the file COPYING3. If not see interface between the GIMPLE and RTL worlds. */ /* The infinite cost. */ -#define INFTY 10000000 +#define INFTY 1000000000 /* Returns the expected number of loop iterations for LOOP. The average trip count is computed from profile data if it @@ -180,7 +180,7 @@ struct comp_cost comp_cost (): cost (0), complexity (0), scratch (0) {} - comp_cost (int cost, unsigned complexity, int scratch = 0) + comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0) : cost (cost), complexity (complexity), scratch (scratch) {} @@ -220,16 +220,16 @@ struct comp_cost /* Returns true if COST1 is smaller or equal than COST2. */ friend bool operator<= (comp_cost cost1, comp_cost cost2); - int cost; /* The runtime cost. */ + int64_t cost; /* The runtime cost. */ unsigned complexity; /* The estimate of the complexity of the code for the computation (in no concrete units -- complexity field should be larger for more complex expressions and addressing modes). */ - int scratch; /* Scratch used during cost computation. */ + int64_t scratch; /* Scratch used during cost computation. */ }; static const comp_cost no_cost; -static const comp_cost infinite_cost (INFTY, INFTY, INFTY); +static const comp_cost infinite_cost (INFTY, 0, INFTY); bool comp_cost::infinite_cost_p () @@ -243,6 +243,7 @@ operator+ (comp_cost cost1, comp_cost cost2) if (cost1.infinite_cost_p () || cost2.infinite_cost_p ()) return infinite_cost; + gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost); cost1.cost += cost2.cost; cost1.complexity += cost2.complexity; @@ -256,6 +257,7 @@ operator- (comp_cost cost1, comp_cost cost2) return infinite_cost; gcc_assert (!cost2.infinite_cost_p ()); + gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost); cost1.cost -= cost2.cost; cost1.complexity -= cost2.complexity; @@ -276,6 +278,7 @@ comp_cost::operator+= (HOST_WIDE_INT c) if (infinite_cost_p ()) return *this; + gcc_assert (this->cost + c < infinite_cost.cost); this->cost += c; return *this; @@ -287,6 +290,7 @@ comp_cost::operator-= (HOST_WIDE_INT c) if (infinite_cost_p ()) return *this; + gcc_assert (this->cost - c < infinite_cost.cost); this->cost -= c; return *this; @@ -295,6 +299,7 @@ comp_cost::operator-= (HOST_WIDE_INT c) comp_cost comp_cost::operator/= (HOST_WIDE_INT c) { + gcc_assert (c != 0); if (infinite_cost_p ()) return *this; @@ -309,6 +314,7 @@ comp_cost::operator*= (HOST_WIDE_INT c) if (infinite_cost_p ()) return *this; + gcc_assert (this->cost * c < infinite_cost.cost); this->cost *= c; return *this; @@ -638,7 +644,7 @@ struct iv_ca comp_cost cand_use_cost; /* Total cost of candidates. */ - unsigned cand_cost; + int64_t cand_cost; /* Number of times each invariant variable is used. */ unsigned *n_inv_var_uses; @@ -4025,16 +4031,16 @@ get_computation_at (struct loop *loop, gimple *at, if we're optimizing for speed, amortize it over the per-iteration cost. If ROUND_UP_P is true, the result is round up rather than to zero when optimizing for speed. */ -static unsigned -adjust_setup_cost (struct ivopts_data *data, unsigned cost, +static int64_t +adjust_setup_cost (struct ivopts_data *data, int64_t cost, bool round_up_p = false) { if (cost == INFTY) return cost; else if (optimize_loop_for_speed_p (data->current_loop)) { - HOST_WIDE_INT niters = avg_loop_niter (data->current_loop); - return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters; + int64_t niters = (int64_t) avg_loop_niter (data->current_loop); + return (cost + (round_up_p ? niters - 1 : 0)) / niters; } else return cost; @@ -4305,7 +4311,7 @@ enum ainc_type struct ainc_cost_data { - unsigned costs[AINC_NONE]; + int64_t costs[AINC_NONE]; }; static comp_cost @@ -4566,12 +4572,12 @@ get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost) if (scale_factor == 1) return cost; - int scaled_cost + int64_t scaled_cost = cost.scratch + (cost.cost - cost.scratch) * scale_factor; if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Scaling cost based on bb prob " - "by %2.2f: %d (scratch: %d) -> %d\n", + fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: " + "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n", 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost); cost.cost = scaled_cost; @@ -5539,7 +5545,7 @@ determine_group_iv_costs (struct ivopts_data *data) || group->cost_map[j].cost.infinite_cost_p ()) continue; - fprintf (dump_file, " %d\t%d\t%d\t", + fprintf (dump_file, " %d\t%" PRId64 "\t%d\t", group->cost_map[j].cand->id, group->cost_map[j].cost.cost, group->cost_map[j].cost.complexity); @@ -5569,7 +5575,7 @@ static void determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand) { comp_cost cost_base; - unsigned cost, cost_step; + int64_t cost, cost_step; tree base; gcc_assert (cand->iv != NULL); @@ -6139,11 +6145,11 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) unsigned i; comp_cost cost = iv_ca_cost (ivs); - fprintf (file, " cost: %d (complexity %d)\n", cost.cost, + fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost, cost.complexity); - fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n", - ivs->cand_cost, ivs->cand_use_cost.cost, - ivs->cand_use_cost.complexity); + fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: " + "%" PRId64 " (complexity %d)\n", ivs->cand_cost, + ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity); bitmap_print (file, ivs->cands, " candidates: ","\n"); for (i = 0; i < ivs->upto; i++) @@ -6151,9 +6157,9 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) struct iv_group *group = data->vgroups[i]; struct cost_pair *cp = iv_ca_cand_for_group (ivs, group); if (cp) - fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n", - group->id, cp->cand->id, cp->cost.cost, - cp->cost.complexity); + fprintf (file, " group:%d --> iv_cand:%d, cost=(" + "%" PRId64 ",%d)\n", group->id, cp->cand->id, + cp->cost.cost, cp->cost.complexity); else fprintf (file, " group:%d --> ??\n", group->id); } @@ -6751,9 +6757,9 @@ find_optimal_iv_set (struct ivopts_data *data) if (dump_file && (dump_flags & TDF_DETAILS)) { - fprintf (dump_file, "Original cost %d (complexity %d)\n\n", + fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n", origcost.cost, origcost.complexity); - fprintf (dump_file, "Final cost %d (complexity %d)\n\n", + fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n", cost.cost, cost.complexity); } |