diff options
author | Sebastian Pop <pop@cri.ensmp.fr> | 2004-10-13 05:48:03 +0200 |
---|---|---|
committer | Daniel Berlin <dberlin@gcc.gnu.org> | 2004-10-13 03:48:03 +0000 |
commit | 86df10e35e37a9bc0a50e43503e9347719cc50e7 (patch) | |
tree | 1226bd75bfb69754520d53c7eb3bfc5d227899fa /gcc | |
parent | 06c3418c6cde0cca0caa32123e5ae3e05ea31019 (diff) | |
download | gcc-86df10e35e37a9bc0a50e43503e9347719cc50e7.zip gcc-86df10e35e37a9bc0a50e43503e9347719cc50e7.tar.gz gcc-86df10e35e37a9bc0a50e43503e9347719cc50e7.tar.bz2 |
Makefile.in (tree-ssa-loop-niter.o): Depends on tree-data-ref.h.
2004-10-11 Sebastian Pop <pop@cri.ensmp.fr>
* Makefile.in (tree-ssa-loop-niter.o): Depends on tree-data-ref.h.
* cfgloop.c (initialize_loops_parallel_p): New.
(flow_loops_find): Initialize the parallel_p field to true for all
the loops.
* tree-ssa-loop-niter.c: Include "tree-data-ref.h".
(estimate_numbers_of_iterations_loop): Infers the loop bounds from
the size of the data accessed in the loop.
(struct nb_iter_bound): Moved...
* cfgloop.h (struct nb_iter_bound): ... here.
(estimated_nb_iterations, parallel_p): New fields in struct loop.
(record_estimate): Declare extern here.
* tree-chrec.c: Fix comments.
(nb_vars_in_chrec): New function.
* tree-chrec.h (nb_vars_in_chrec): Declared here.
* tree-data-ref.c: Don't include lambda.h, that is already included
in tree-data-ref.h.
(tree_fold_divides_p): Don't check for integer_onep.
(tree_fold_bezout): Removed.
(gcd): New static duplicated function.
(int_divides_p, dump_subscript): New.
(dump_data_dependence_relation): Use dump_subscript.
(dump_dist_dir_vectors, dump_ddrs, compute_estimated_nb_iterations,
estimate_niter_from_size_of_data): New.
(analyze_array_indexes, analyze_array): Call
estimate_niter_from_size_of_data during the detection of array
references. Pass in a pointer to the statement that contains the
array reference.
(all_chrecs_equal_p): New.
(compute_distance_vector): Renamed compute_subscript_distance.
Deal with multivariate conflict functions.
(initialize_data_dependence_relation): Initialize DDR_AFFINE_P,
DDR_SIZE_VECT, DDR_DIST_VECT, and DDR_DIR_VECT.
(non_affine_dependence_relation): New.
(analyze_ziv_subscript, analyze_siv_subscript_cst_affine,
analyze_siv_subscript, analyze_miv_subscript,
analyze_overlapping_iterations, subscript_dependence_tester):
Initialize and return last_conflicts function.
(initialize_matrix_A, FLOOR, compute_overlap_steps_for_affine_univar,
compute_overlap_steps_for_affine_1_2): New.
(analyze_siv_subscript_affine_cst): Removed.
(analyze_subscript_affine_affine): Disprove dependences based on the
iteration domains. Solve the univariate dependence case as before,
but use lambda_matrix_right_hermite instead of tree_fold_bezout.
Implement the multivariate case of 2 versus 1 variables.
(build_classic_dist_vector, build_classic_dir_vector): Implement some
unhandled cases.
(find_data_references_in_loop): Compute and initialize
loop->estimated_nb_iterations and loop->parallel_p.
(analyze_all_data_dependences): Modify the debug dump order.
* tree-data-ref.h (SUB_LAST_CONFLICT_IN_A, SUB_LAST_CONFLICT_IN_B,
subscript->last_conflict_in_a, subscript->last_conflict_in_b): Removed.
(SUB_LAST_CONFLICT, subscript->last_conflict,
data_dependence_relation->affine_p, data_dependence_relation->size_vect,
DDR_AFFINE_P, DDR_SIZE_VECT): New.
(find_data_references_in_loop, initialize_data_dependence_relation,
dump_subscript, dump_ddrs, dump_dist_dir_vectors): Declared here.
From-SVN: r88965
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 59 | ||||
-rw-r--r-- | gcc/Makefile.in | 2 | ||||
-rw-r--r-- | gcc/cfgloop.c | 15 | ||||
-rw-r--r-- | gcc/cfgloop.h | 25 | ||||
-rw-r--r-- | gcc/tree-chrec.c | 24 | ||||
-rw-r--r-- | gcc/tree-chrec.h | 1 | ||||
-rw-r--r-- | gcc/tree-data-ref.c | 1307 | ||||
-rw-r--r-- | gcc/tree-data-ref.h | 28 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-niter.c | 27 |
9 files changed, 1033 insertions, 455 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5244cb3..5083211 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,62 @@ +2004-10-11 Sebastian Pop <pop@cri.ensmp.fr> + + * Makefile.in (tree-ssa-loop-niter.o): Depends on tree-data-ref.h. + * cfgloop.c (initialize_loops_parallel_p): New. + (flow_loops_find): Initialize the parallel_p field to true for all + the loops. + * tree-ssa-loop-niter.c: Include "tree-data-ref.h". + (estimate_numbers_of_iterations_loop): Infers the loop bounds from + the size of the data accessed in the loop. + (struct nb_iter_bound): Moved... + * cfgloop.h (struct nb_iter_bound): ... here. + (estimated_nb_iterations, parallel_p): New fields in struct loop. + (record_estimate): Declare extern here. + * tree-chrec.c: Fix comments. + (nb_vars_in_chrec): New function. + * tree-chrec.h (nb_vars_in_chrec): Declared here. + * tree-data-ref.c: Don't include lambda.h, that is already included + in tree-data-ref.h. + (tree_fold_divides_p): Don't check for integer_onep. + (tree_fold_bezout): Removed. + (gcd): New static duplicated function. + (int_divides_p, dump_subscript): New. + (dump_data_dependence_relation): Use dump_subscript. + (dump_dist_dir_vectors, dump_ddrs, compute_estimated_nb_iterations, + estimate_niter_from_size_of_data): New. + (analyze_array_indexes, analyze_array): Call + estimate_niter_from_size_of_data during the detection of array + references. Pass in a pointer to the statement that contains the + array reference. + (all_chrecs_equal_p): New. + (compute_distance_vector): Renamed compute_subscript_distance. + Deal with multivariate conflict functions. + (initialize_data_dependence_relation): Initialize DDR_AFFINE_P, + DDR_SIZE_VECT, DDR_DIST_VECT, and DDR_DIR_VECT. + (non_affine_dependence_relation): New. + (analyze_ziv_subscript, analyze_siv_subscript_cst_affine, + analyze_siv_subscript, analyze_miv_subscript, + analyze_overlapping_iterations, subscript_dependence_tester): + Initialize and return last_conflicts function. + (initialize_matrix_A, FLOOR, compute_overlap_steps_for_affine_univar, + compute_overlap_steps_for_affine_1_2): New. + (analyze_siv_subscript_affine_cst): Removed. + (analyze_subscript_affine_affine): Disprove dependences based on the + iteration domains. Solve the univariate dependence case as before, + but use lambda_matrix_right_hermite instead of tree_fold_bezout. + Implement the multivariate case of 2 versus 1 variables. + (build_classic_dist_vector, build_classic_dir_vector): Implement some + unhandled cases. + (find_data_references_in_loop): Compute and initialize + loop->estimated_nb_iterations and loop->parallel_p. + (analyze_all_data_dependences): Modify the debug dump order. + * tree-data-ref.h (SUB_LAST_CONFLICT_IN_A, SUB_LAST_CONFLICT_IN_B, + subscript->last_conflict_in_a, subscript->last_conflict_in_b): Removed. + (SUB_LAST_CONFLICT, subscript->last_conflict, + data_dependence_relation->affine_p, data_dependence_relation->size_vect, + DDR_AFFINE_P, DDR_SIZE_VECT): New. + (find_data_references_in_loop, initialize_data_dependence_relation, + dump_subscript, dump_ddrs, dump_dist_dir_vectors): Declared here. + 2004-10-12 Kelley Cook <kcook@gcc.gnu.org> * configure: Regenerate. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 50cf9dd..622ecd2 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1706,7 +1706,7 @@ tree-ssa-loop-unswitch.o : tree-ssa-loop-unswitch.c $(TREE_FLOW_H) $(CONFIG_H) \ tree-ssa-loop-niter.o : tree-ssa-loop-niter.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) cfgloop.h $(PARAMS_H) tree-inline.h \ output.h diagnostic.h $(TM_H) coretypes.h $(TREE_DUMP_H) $(FLAGS_H) \ - tree-pass.h $(SCEV_H) + tree-pass.h $(SCEV_H) tree-data-ref.h tree-ssa-loop-ivcanon.o : tree-ssa-loop-ivcanon.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(PARAMS_H) tree-inline.h \ output.h diagnostic.h $(TM_H) coretypes.h $(TREE_DUMP_H) $(FLAGS_H) \ diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c index a38af16..b995b39 100644 --- a/gcc/cfgloop.c +++ b/gcc/cfgloop.c @@ -780,6 +780,20 @@ canonicalize_loop_headers (void) #endif } +/* Initialize all the parallel_p fields of the loops structure to true. */ + +static void +initialize_loops_parallel_p (struct loops *loops) +{ + unsigned int i; + + for (i = 0; i < loops->num; i++) + { + struct loop *loop = loops->parray[i]; + loop->parallel_p = true; + } +} + /* Find all the natural loops in the function and save in LOOPS structure and recalculate loop_depth information in basic block structures. FLAGS controls which loop information is collected. Return the number of natural @@ -945,6 +959,7 @@ flow_loops_find (struct loops *loops, int flags) flow_loop_scan (loops->parray[i], flags); loops->num = num_loops; + initialize_loops_parallel_p (loops); } sbitmap_free (headers); diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 1b45a56..5de3f64 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -43,6 +43,20 @@ struct lpt_decision unsigned times; }; +/* The structure describing a bound on number of iterations of a loop. */ + +struct nb_iter_bound +{ + tree bound; /* The expression whose value is an upper bound on the + number of executions of anything after ... */ + tree at_stmt; /* ... this statement during one execution of loop. */ + tree additional; /* A conjunction of conditions the operands of BOUND + satisfy. The additional information about the value + of the bound may be derived from it. */ + struct nb_iter_bound *next; + /* The next bound in a list. */ +}; + /* Structure to hold information for each natural loop. */ struct loop { @@ -173,12 +187,22 @@ struct loop information in this field. */ tree nb_iterations; + /* An INTEGER_CST estimation of the number of iterations. NULL_TREE + if there is no estimation. */ + tree estimated_nb_iterations; + /* Upper bound on number of iterations of a loop. */ struct nb_iter_bound *bounds; /* If not NULL, loop has just single exit edge stored here (edges to the EXIT_BLOCK_PTR do not count. */ edge single_exit; + + /* True when the loop does not carry data dependences, and + consequently the iterations can be executed in any order. False + when the loop carries data dependences, or when the property is + not decidable. */ + bool parallel_p; }; /* Flags for state of loop structure. */ @@ -462,6 +486,7 @@ enum extern void unroll_and_peel_loops (struct loops *, int); extern void doloop_optimize_loops (struct loops *); extern void move_loop_invariants (struct loops *); +extern void record_estimate (struct loop *, tree, tree, tree); /* Old loop optimizer interface. */ diff --git a/gcc/tree-chrec.c b/gcc/tree-chrec.c index 00f3b5f..be0d80a 100644 --- a/gcc/tree-chrec.c +++ b/gcc/tree-chrec.c @@ -636,7 +636,7 @@ chrec_component_in_loop_num (tree chrec, } /* Returns the evolution part in LOOP_NUM. Example: the call - evolution_part_in_loop_num (1, {{0, +, 1}_1, +, 2}_1) returns + evolution_part_in_loop_num ({{0, +, 1}_1, +, 2}_1, 1) returns {1, +, 2}_1 */ tree @@ -647,7 +647,7 @@ evolution_part_in_loop_num (tree chrec, } /* Returns the initial condition in LOOP_NUM. Example: the call - initial_condition_in_loop_num ({{0, +, 1}_1, +, 2}_2, 1) returns + initial_condition_in_loop_num ({{0, +, 1}_1, +, 2}_2, 2) returns {0, +, 1}_1 */ tree @@ -932,6 +932,26 @@ evolution_function_is_univariate_p (tree chrec) } } +/* Returns the number of variables of CHREC. Example: the call + nb_vars_in_chrec ({{0, +, 1}_5, +, 2}_6) returns 2. */ + +unsigned +nb_vars_in_chrec (tree chrec) +{ + if (chrec == NULL_TREE) + return 0; + + switch (TREE_CODE (chrec)) + { + case POLYNOMIAL_CHREC: + return 1 + nb_vars_in_chrec + (initial_condition_in_loop_num (chrec, CHREC_VARIABLE (chrec))); + + default: + return 0; + } +} + /* Convert the initial condition of chrec to type. */ diff --git a/gcc/tree-chrec.h b/gcc/tree-chrec.h index ee31296..bc722d8 100644 --- a/gcc/tree-chrec.h +++ b/gcc/tree-chrec.h @@ -91,6 +91,7 @@ extern bool chrec_contains_undetermined (tree); extern bool tree_contains_chrecs (tree); extern bool evolution_function_is_affine_multivariate_p (tree); extern bool evolution_function_is_univariate_p (tree); +extern unsigned nb_vars_in_chrec (tree); diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 069dcde..213f8d4 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -44,7 +44,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA - polyhedron dependence or with the chains of recurrences based representation, - - to define a knowledge base for storing the data dependences + - to define a knowledge base for storing the data dependeces information, - to define an interface to access this data. @@ -94,7 +94,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "tree-data-ref.h" #include "tree-scalar-evolution.h" #include "tree-pass.h" -#include "lambda.h" /* This is the simplest data dependence test: determines whether the data references A and B access the same array/region. Returns @@ -113,6 +112,7 @@ array_base_name_differ_p (struct data_reference *a, tree ta = TREE_TYPE (base_a); tree tb = TREE_TYPE (base_b); + /* Determine if same base. Example: for the array accesses a[i], b[i] or pointer accesses *a, *b, bases are a, b. */ if (base_a == base_b) @@ -130,7 +130,7 @@ array_base_name_differ_p (struct data_reference *a, return true; } - /* record/union based accesses - s.a[i], t.b[j]. bases are s.a,t.b. */ + /* Record/union based accesses - s.a[i], t.b[j]. bases are s.a,t.b. */ if (TREE_CODE (base_a) == COMPONENT_REF && TREE_CODE (base_b) == COMPONENT_REF && TREE_OPERAND (base_a, 0) == TREE_OPERAND (base_b, 0) && TREE_OPERAND (base_a, 1) == TREE_OPERAND (base_b, 1)) @@ -139,6 +139,7 @@ array_base_name_differ_p (struct data_reference *a, return true; } + /* Determine if different bases. */ /* At this point we know that base_a != base_b. However, pointer @@ -206,109 +207,39 @@ tree_fold_divides_p (tree type, tree a, tree b) { - if (integer_onep (a)) - return true; - /* Determines whether (A == gcd (A, B)). */ return integer_zerop (fold (build (MINUS_EXPR, type, a, tree_fold_gcd (a, b)))); } -/* Bezout: Let A1 and A2 be two integers; there exist two integers U11 - and U12 such that, - - | U11 * A1 + U12 * A2 = gcd (A1, A2). - - This function computes the greatest common divisor using the - Blankinship algorithm. The gcd is returned, and the coefficients - of the unimodular matrix U are (U11, U12, U21, U22) such that, - - | U.A = S - - | (U11 U12) (A1) = (gcd) - | (U21 U22) (A2) (0) - - FIXME: Use lambda_..._hermite for implementing this function. -*/ +/* Compute the greatest common denominator of two numbers using + Euclid's algorithm. */ -static tree -tree_fold_bezout (tree a1, - tree a2, - tree *u11, tree *u12, - tree *u21, tree *u22) +static int +gcd (int a, int b) { - tree s1, s2; - - /* Initialize S with the coefficients of A. */ - s1 = a1; - s2 = a2; - - /* Initialize the U matrix */ - *u11 = integer_one_node; - *u12 = integer_zero_node; - *u21 = integer_zero_node; - *u22 = integer_one_node; - if (integer_zerop (a1) - || integer_zerop (a2)) - return integer_zero_node; - - while (!integer_zerop (s2)) - { - int sign; - tree z, zu21, zu22, zs2; - - sign = tree_int_cst_sgn (s1) * tree_int_cst_sgn (s2); - z = fold (build (FLOOR_DIV_EXPR, integer_type_node, - fold (build1 (ABS_EXPR, integer_type_node, s1)), - fold (build1 (ABS_EXPR, integer_type_node, s2)))); - zu21 = fold (build (MULT_EXPR, integer_type_node, z, *u21)); - zu22 = fold (build (MULT_EXPR, integer_type_node, z, *u22)); - zs2 = fold (build (MULT_EXPR, integer_type_node, z, s2)); - - /* row1 -= z * row2. */ - gcc_assert (sign != 0); - if (sign < 0) - { - *u11 = fold (build (PLUS_EXPR, integer_type_node, *u11, zu21)); - *u12 = fold (build (PLUS_EXPR, integer_type_node, *u12, zu22)); - s1 = fold (build (PLUS_EXPR, integer_type_node, s1, zs2)); - } - else - { - *u11 = fold (build (MINUS_EXPR, integer_type_node, *u11, zu21)); - *u12 = fold (build (MINUS_EXPR, integer_type_node, *u12, zu22)); - s1 = fold (build (MINUS_EXPR, integer_type_node, s1, zs2)); - } - - /* Interchange row1 and row2. */ - { - tree flip; - - flip = *u11; - *u11 = *u21; - *u21 = flip; - - flip = *u12; - *u12 = *u22; - *u22 = flip; - - flip = s1; - s1 = s2; - s2 = flip; - } - } + int x, y, z; - if (tree_int_cst_sgn (s1) < 0) + x = abs (a); + y = abs (b); + + while (x>0) { - *u11 = fold (build (MULT_EXPR, integer_type_node, *u11, - integer_minus_one_node)); - *u12 = fold (build (MULT_EXPR, integer_type_node, *u12, - integer_minus_one_node)); - s1 = fold (build (MULT_EXPR, integer_type_node, s1, integer_minus_one_node)); + z = y % x; + y = x; + x = z; } - - return s1; + + return (y); +} + +/* Returns true iff A divides B. */ + +static inline bool +int_divides_p (int a, int b) +{ + return ((b % a) == 0); } @@ -360,83 +291,85 @@ dump_data_reference (FILE *outf, fprintf (outf, ")\n"); } +/* Dump function for a SUBSCRIPT structure. */ + +void +dump_subscript (FILE *outf, struct subscript *subscript) +{ + tree chrec = SUB_CONFLICTS_IN_A (subscript); + + fprintf (outf, "\n (subscript \n"); + fprintf (outf, " iterations_that_access_an_element_twice_in_A: "); + print_generic_stmt (outf, chrec, 0); + if (chrec == chrec_known) + fprintf (outf, " (no dependence)\n"); + else if (chrec_contains_undetermined (chrec)) + fprintf (outf, " (don't know)\n"); + else + { + tree last_iteration = SUB_LAST_CONFLICT (subscript); + fprintf (outf, " last_conflict: "); + print_generic_stmt (outf, last_iteration, 0); + } + + chrec = SUB_CONFLICTS_IN_B (subscript); + fprintf (outf, " iterations_that_access_an_element_twice_in_B: "); + print_generic_stmt (outf, chrec, 0); + if (chrec == chrec_known) + fprintf (outf, " (no dependence)\n"); + else if (chrec_contains_undetermined (chrec)) + fprintf (outf, " (don't know)\n"); + else + { + tree last_iteration = SUB_LAST_CONFLICT (subscript); + fprintf (outf, " last_conflict: "); + print_generic_stmt (outf, last_iteration, 0); + } + + fprintf (outf, " (Subscript distance: "); + print_generic_stmt (outf, SUB_DISTANCE (subscript), 0); + fprintf (outf, " )\n"); + fprintf (outf, " )\n"); +} + /* Dump function for a DATA_DEPENDENCE_RELATION structure. */ void dump_data_dependence_relation (FILE *outf, struct data_dependence_relation *ddr) { - unsigned int i; struct data_reference *dra, *drb; - + dra = DDR_A (ddr); drb = DDR_B (ddr); - - if (dra && drb) - fprintf (outf, "(Data Dep:"); - else - fprintf (outf, "(Data Dep:"); - - if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr))) + fprintf (outf, "(Data Dep: \n"); + if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) fprintf (outf, " (don't know)\n"); else if (DDR_ARE_DEPENDENT (ddr) == chrec_known) fprintf (outf, " (no dependence)\n"); - else + else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) { + unsigned int i; for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) { - tree chrec; - struct subscript *subscript = DDR_SUBSCRIPT (ddr, i); - - fprintf (outf, "\n (subscript %d:\n", i); fprintf (outf, " access_fn_A: "); print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0); fprintf (outf, " access_fn_B: "); print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0); - - chrec = SUB_CONFLICTS_IN_A (subscript); - fprintf (outf, " iterations_that_access_an_element_twice_in_A: "); - print_generic_stmt (outf, chrec, 0); - if (chrec == chrec_known) - fprintf (outf, " (no dependence)\n"); - else if (chrec_contains_undetermined (chrec)) - fprintf (outf, " (don't know)\n"); - else - { - tree last_iteration = SUB_LAST_CONFLICT_IN_A (subscript); - fprintf (outf, " last_iteration_that_access_an_element_twice_in_A: "); - print_generic_stmt (outf, last_iteration, 0); - } - - chrec = SUB_CONFLICTS_IN_B (subscript); - fprintf (outf, " iterations_that_access_an_element_twice_in_B: "); - print_generic_stmt (outf, chrec, 0); - if (chrec == chrec_known) - fprintf (outf, " (no dependence)\n"); - else if (chrec_contains_undetermined (chrec)) - fprintf (outf, " (don't know)\n"); - else - { - tree last_iteration = SUB_LAST_CONFLICT_IN_B (subscript); - fprintf (outf, " last_iteration_that_access_an_element_twice_in_B: "); - print_generic_stmt (outf, last_iteration, 0); - } - - fprintf (outf, " )\n"); + dump_subscript (outf, DDR_SUBSCRIPT (ddr, i)); } - - fprintf (outf, " (Distance Vector: \n"); - for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) + if (DDR_DIST_VECT (ddr)) { - struct subscript *subscript = DDR_SUBSCRIPT (ddr, i); - - fprintf (outf, "("); - print_generic_stmt (outf, SUB_DISTANCE (subscript), 0); - fprintf (outf, ")\n"); + fprintf (outf, " distance_vect: "); + print_lambda_vector (outf, DDR_DIST_VECT (ddr), DDR_SIZE_VECT (ddr)); + } + if (DDR_DIR_VECT (ddr)) + { + fprintf (outf, " direction_vect: "); + print_lambda_vector (outf, DDR_DIR_VECT (ddr), DDR_SIZE_VECT (ddr)); } - fprintf (outf, " )\n"); } fprintf (outf, ")\n"); @@ -485,8 +418,120 @@ dump_data_dependence_direction (FILE *file, } } +/* Dumps the distance and direction vectors in FILE. DDRS contains + the dependence relations, and VECT_SIZE is the size of the + dependence vectors, or in other words the number of loops in the + considered nest. */ + +void +dump_dist_dir_vectors (FILE *file, varray_type ddrs) +{ + unsigned int i; + + for (i = 0; i < VARRAY_ACTIVE_SIZE (ddrs); i++) + { + struct data_dependence_relation *ddr = + (struct data_dependence_relation *) + VARRAY_GENERIC_PTR (ddrs, i); + if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE + && DDR_AFFINE_P (ddr)) + { + fprintf (file, "DISTANCE_V ("); + print_lambda_vector (file, DDR_DIST_VECT (ddr), DDR_SIZE_VECT (ddr)); + fprintf (file, ")\n"); + fprintf (file, "DIRECTION_V ("); + print_lambda_vector (file, DDR_DIR_VECT (ddr), DDR_SIZE_VECT (ddr)); + fprintf (file, ")\n"); + } + } + fprintf (file, "\n\n"); +} + +/* Dumps the data dependence relations DDRS in FILE. */ + +void +dump_ddrs (FILE *file, varray_type ddrs) +{ + unsigned int i; + + for (i = 0; i < VARRAY_ACTIVE_SIZE (ddrs); i++) + { + struct data_dependence_relation *ddr = + (struct data_dependence_relation *) + VARRAY_GENERIC_PTR (ddrs, i); + dump_data_dependence_relation (file, ddr); + } + fprintf (file, "\n\n"); +} + +/* Compute the lowest iteration bound for LOOP. It is an + INTEGER_CST. */ + +static void +compute_estimated_nb_iterations (struct loop *loop) +{ + tree estimation; + struct nb_iter_bound *bound, *next; + + for (bound = loop->bounds; bound; bound = next) + { + next = bound->next; + estimation = bound->bound; + + if (TREE_CODE (estimation) != INTEGER_CST) + continue; + + if (loop->estimated_nb_iterations) + { + /* Update only if estimation is smaller. */ + if (tree_int_cst_lt (estimation, loop->estimated_nb_iterations)) + loop->estimated_nb_iterations = estimation; + } + else + loop->estimated_nb_iterations = estimation; + } +} + +/* Estimate the number of iterations from the size of the data and the + access functions. */ + +static void +estimate_niter_from_size_of_data (struct loop *loop, + tree opnd0, + tree access_fn, + tree stmt) +{ + tree estimation; + tree array_size, data_size, element_size; + tree init, step; + + init = initial_condition (access_fn); + step = evolution_part_in_loop_num (access_fn, loop->num); + + array_size = TYPE_SIZE (TREE_TYPE (opnd0)); + element_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (opnd0))); + if (array_size == NULL_TREE + || element_size == NULL_TREE) + return; + + data_size = fold (build2 (EXACT_DIV_EXPR, integer_type_node, + array_size, element_size)); + + if (init != NULL_TREE + && step != NULL_TREE + && TREE_CODE (init) == INTEGER_CST + && TREE_CODE (step) == INTEGER_CST) + { + estimation = fold (build2 (CEIL_DIV_EXPR, integer_type_node, + fold (build2 (MINUS_EXPR, integer_type_node, + data_size, init)), step)); + + record_estimate (loop, estimation, boolean_true_node, stmt); + } +} + /* Given an ARRAY_REF node REF, records its access functions. Example: given A[i][3], record in ACCESS_FNS the opnd1 function, i.e. the constant "3", then recursively call the function on opnd0, @@ -496,7 +541,7 @@ dump_data_dependence_direction (FILE *file, static tree analyze_array_indexes (struct loop *loop, varray_type *access_fns, - tree ref) + tree ref, tree stmt) { tree opnd0, opnd1; tree access_fn; @@ -510,12 +555,15 @@ analyze_array_indexes (struct loop *loop, the optimizers. */ access_fn = instantiate_parameters (loop, analyze_scalar_evolution (loop, opnd1)); + + if (loop->estimated_nb_iterations == NULL_TREE) + estimate_niter_from_size_of_data (loop, opnd0, access_fn, stmt); VARRAY_PUSH_TREE (*access_fns, access_fn); /* Recursively record other array access functions. */ if (TREE_CODE (opnd0) == ARRAY_REF) - return analyze_array_indexes (loop, access_fns, opnd0); + return analyze_array_indexes (loop, access_fns, opnd0, stmt); /* Return the base name of the data access. */ else @@ -546,7 +594,7 @@ analyze_array (tree stmt, tree ref, bool is_read) DR_REF (res) = ref; VARRAY_TREE_INIT (DR_ACCESS_FNS (res), 3, "access_fns"); DR_BASE_NAME (res) = analyze_array_indexes - (loop_containing_stmt (stmt), &(DR_ACCESS_FNS (res)), ref); + (loop_containing_stmt (stmt), &(DR_ACCESS_FNS (res)), ref, stmt); DR_IS_READ (res) = is_read; if (dump_file && (dump_flags & TDF_DETAILS)) @@ -592,11 +640,31 @@ init_data_ref (tree stmt, -/* When there exists a dependence relation, determine its distance - vector. */ +/* Returns true when all the functions of a tree_vec CHREC are the + same. */ + +static bool +all_chrecs_equal_p (tree chrec) +{ + int j; + + for (j = 0; j < TREE_VEC_LENGTH (chrec) - 1; j++) + { + tree chrec_j = TREE_VEC_ELT (chrec, j); + tree chrec_j_1 = TREE_VEC_ELT (chrec, j + 1); + if (!integer_zerop + (chrec_fold_minus + (integer_type_node, chrec_j, chrec_j_1))) + return false; + } + return true; +} + +/* Determine for each subscript in the data dependence relation DDR + the distance. */ static void -compute_distance_vector (struct data_dependence_relation *ddr) +compute_subscript_distance (struct data_dependence_relation *ddr) { if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) { @@ -610,7 +678,30 @@ compute_distance_vector (struct data_dependence_relation *ddr) subscript = DDR_SUBSCRIPT (ddr, i); conflicts_a = SUB_CONFLICTS_IN_A (subscript); conflicts_b = SUB_CONFLICTS_IN_B (subscript); - difference = chrec_fold_minus + + if (TREE_CODE (conflicts_a) == TREE_VEC) + { + if (!all_chrecs_equal_p (conflicts_a)) + { + SUB_DISTANCE (subscript) = chrec_dont_know; + return; + } + else + conflicts_a = TREE_VEC_ELT (conflicts_a, 0); + } + + if (TREE_CODE (conflicts_b) == TREE_VEC) + { + if (!all_chrecs_equal_p (conflicts_b)) + { + SUB_DISTANCE (subscript) = chrec_dont_know; + return; + } + else + conflicts_b = TREE_VEC_ELT (conflicts_b, 0); + } + + difference = chrec_fold_minus (integer_type_node, conflicts_b, conflicts_a); if (evolution_function_is_constant_p (difference)) @@ -649,8 +740,12 @@ initialize_data_dependence_relation (struct data_reference *a, else { unsigned int i; + DDR_AFFINE_P (res) = true; DDR_ARE_DEPENDENT (res) = NULL_TREE; DDR_SUBSCRIPTS_VECTOR_INIT (res, DR_NUM_DIMENSIONS (a)); + DDR_SIZE_VECT (res) = 0; + DDR_DIST_VECT (res) = NULL; + DDR_DIR_VECT (res) = NULL; for (i = 0; i < DR_NUM_DIMENSIONS (a); i++) { @@ -659,8 +754,7 @@ initialize_data_dependence_relation (struct data_reference *a, subscript = xmalloc (sizeof (struct subscript)); SUB_CONFLICTS_IN_A (subscript) = chrec_dont_know; SUB_CONFLICTS_IN_B (subscript) = chrec_dont_know; - SUB_LAST_CONFLICT_IN_A (subscript) = chrec_dont_know; - SUB_LAST_CONFLICT_IN_B (subscript) = chrec_dont_know; + SUB_LAST_CONFLICT (subscript) = chrec_dont_know; SUB_DISTANCE (subscript) = chrec_dont_know; VARRAY_PUSH_GENERIC_PTR (DDR_SUBSCRIPTS (res), subscript); } @@ -687,6 +781,18 @@ finalize_ddr_dependent (struct data_dependence_relation *ddr, varray_clear (DDR_SUBSCRIPTS (ddr)); } +/* The dependence relation DDR cannot be represented by a distance + vector. */ + +static inline void +non_affine_dependence_relation (struct data_dependence_relation *ddr) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n"); + + DDR_AFFINE_P (ddr) = false; +} + /* This section contains the classic Banerjee tests. */ @@ -750,7 +856,8 @@ static void analyze_ziv_subscript (tree chrec_a, tree chrec_b, tree *overlaps_a, - tree *overlaps_b) + tree *overlaps_b, + tree *last_conflicts) { tree difference; @@ -768,12 +875,14 @@ analyze_ziv_subscript (tree chrec_a, overlaps for each iteration in the loop. */ *overlaps_a = integer_zero_node; *overlaps_b = integer_zero_node; + *last_conflicts = chrec_dont_know; } else { /* The accesses do not overlap. */ *overlaps_a = chrec_known; - *overlaps_b = chrec_known; + *overlaps_b = chrec_known; + *last_conflicts = integer_zero_node; } break; @@ -781,7 +890,8 @@ analyze_ziv_subscript (tree chrec_a, /* We're not sure whether the indexes overlap. For the moment, conservatively answer "don't know". */ *overlaps_a = chrec_dont_know; - *overlaps_b = chrec_dont_know; + *overlaps_b = chrec_dont_know; + *last_conflicts = chrec_dont_know; break; } @@ -801,7 +911,8 @@ static void analyze_siv_subscript_cst_affine (tree chrec_a, tree chrec_b, tree *overlaps_a, - tree *overlaps_b) + tree *overlaps_b, + tree *last_conflicts) { bool value0, value1, value2; tree difference = chrec_fold_minus @@ -811,6 +922,7 @@ analyze_siv_subscript_cst_affine (tree chrec_a, { *overlaps_a = chrec_dont_know; *overlaps_b = chrec_dont_know; + *last_conflicts = chrec_dont_know; return; } else @@ -821,6 +933,7 @@ analyze_siv_subscript_cst_affine (tree chrec_a, { *overlaps_a = chrec_dont_know; *overlaps_b = chrec_dont_know; + *last_conflicts = chrec_dont_know; return; } else @@ -840,6 +953,7 @@ analyze_siv_subscript_cst_affine (tree chrec_a, (build (EXACT_DIV_EXPR, integer_type_node, fold (build1 (ABS_EXPR, integer_type_node, difference)), CHREC_RIGHT (chrec_b))); + *last_conflicts = integer_one_node; return; } @@ -849,6 +963,7 @@ analyze_siv_subscript_cst_affine (tree chrec_a, { *overlaps_a = chrec_known; *overlaps_b = chrec_known; + *last_conflicts = integer_zero_node; return; } } @@ -862,6 +977,7 @@ analyze_siv_subscript_cst_affine (tree chrec_a, In this case, chrec_a will not overlap with chrec_b. */ *overlaps_a = chrec_known; *overlaps_b = chrec_known; + *last_conflicts = integer_zero_node; return; } } @@ -872,6 +988,7 @@ analyze_siv_subscript_cst_affine (tree chrec_a, { *overlaps_a = chrec_dont_know; *overlaps_b = chrec_dont_know; + *last_conflicts = chrec_dont_know; return; } else @@ -889,6 +1006,7 @@ analyze_siv_subscript_cst_affine (tree chrec_a, *overlaps_b = fold (build (EXACT_DIV_EXPR, integer_type_node, difference, CHREC_RIGHT (chrec_b))); + *last_conflicts = integer_one_node; return; } @@ -898,6 +1016,7 @@ analyze_siv_subscript_cst_affine (tree chrec_a, { *overlaps_a = chrec_known; *overlaps_b = chrec_known; + *last_conflicts = integer_zero_node; return; } } @@ -910,6 +1029,7 @@ analyze_siv_subscript_cst_affine (tree chrec_a, In this case, chrec_a will not overlap with chrec_b. */ *overlaps_a = chrec_known; *overlaps_b = chrec_known; + *last_conflicts = integer_zero_node; return; } } @@ -917,21 +1037,193 @@ analyze_siv_subscript_cst_affine (tree chrec_a, } } -/* Analyze a SIV (Single Index Variable) subscript where CHREC_A is an - affine function, and CHREC_B is a constant. *OVERLAPS_A and - *OVERLAPS_B are initialized to the functions that describe the - relation between the elements accessed twice by CHREC_A and - CHREC_B. For k >= 0, the following property is verified: +/* Helper recursive function for intializing the matrix A. Returns + the initial value of CHREC. */ - CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ +static int +initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult) +{ + gcc_assert (chrec); + + if (TREE_CODE (chrec) != POLYNOMIAL_CHREC) + return int_cst_value (chrec); + + A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec)); + return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult); +} + +#define FLOOR_DIV(x,y) ((x) / (y)) + +/* Solves the special case of the Diophantine equation: + | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B) + + Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the + number of iterations that loops X and Y run. The overlaps will be + constructed as evolutions in dimension DIM. */ static void -analyze_siv_subscript_affine_cst (tree chrec_a, - tree chrec_b, - tree *overlaps_a, - tree *overlaps_b) +compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b, + tree *overlaps_a, tree *overlaps_b, + tree *last_conflicts, int dim) +{ + if (((step_a > 0 && step_b > 0) + || (step_a < 0 && step_b < 0))) + { + int step_overlaps_a, step_overlaps_b; + int gcd_steps_a_b, last_conflict, tau2; + + gcd_steps_a_b = gcd (step_a, step_b); + step_overlaps_a = step_b / gcd_steps_a_b; + step_overlaps_b = step_a / gcd_steps_a_b; + + tau2 = FLOOR_DIV (niter, step_overlaps_a); + tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b)); + last_conflict = tau2; + + *overlaps_a = build_polynomial_chrec + (dim, integer_zero_node, + build_int_cst (NULL_TREE, step_overlaps_a)); + *overlaps_b = build_polynomial_chrec + (dim, integer_zero_node, + build_int_cst (NULL_TREE, step_overlaps_b)); + *last_conflicts = build_int_cst (NULL_TREE, last_conflict); + } + + else + { + *overlaps_a = integer_zero_node; + *overlaps_b = integer_zero_node; + *last_conflicts = integer_zero_node; + } +} + + +/* Solves the special case of a Diophantine equation where CHREC_A is + an affine bivariate function, and CHREC_B is an affine univariate + function. For example, + + | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z + + has the following overlapping functions: + + | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v + | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v + | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v + + FORNOW: This is a specialized implementation for a case occuring in + a common benchmark. Implement the general algorithm. */ + +static void +compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b, + tree *overlaps_a, tree *overlaps_b, + tree *last_conflicts) { - analyze_siv_subscript_cst_affine (chrec_b, chrec_a, overlaps_b, overlaps_a); + bool xz_p, yz_p, xyz_p; + int step_x, step_y, step_z; + int niter_x, niter_y, niter_z, niter; + tree numiter_x, numiter_y, numiter_z; + tree overlaps_a_xz, overlaps_b_xz, last_conflicts_xz; + tree overlaps_a_yz, overlaps_b_yz, last_conflicts_yz; + tree overlaps_a_xyz, overlaps_b_xyz, last_conflicts_xyz; + + step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a))); + step_y = int_cst_value (CHREC_RIGHT (chrec_a)); + step_z = int_cst_value (CHREC_RIGHT (chrec_b)); + + numiter_x = number_of_iterations_in_loop + (current_loops->parray[CHREC_VARIABLE (CHREC_LEFT (chrec_a))]); + numiter_y = number_of_iterations_in_loop + (current_loops->parray[CHREC_VARIABLE (chrec_a)]); + numiter_z = number_of_iterations_in_loop + (current_loops->parray[CHREC_VARIABLE (chrec_b)]); + + if (TREE_CODE (numiter_x) != INTEGER_CST) + numiter_x = current_loops->parray[CHREC_VARIABLE (CHREC_LEFT (chrec_a))] + ->estimated_nb_iterations; + if (TREE_CODE (numiter_y) != INTEGER_CST) + numiter_y = current_loops->parray[CHREC_VARIABLE (chrec_a)] + ->estimated_nb_iterations; + if (TREE_CODE (numiter_z) != INTEGER_CST) + numiter_z = current_loops->parray[CHREC_VARIABLE (chrec_b)] + ->estimated_nb_iterations; + + if (numiter_x == NULL_TREE || numiter_y == NULL_TREE + || numiter_z == NULL_TREE) + { + *overlaps_a = chrec_dont_know; + *overlaps_b = chrec_dont_know; + *last_conflicts = chrec_dont_know; + return; + } + + niter_x = int_cst_value (numiter_x); + niter_y = int_cst_value (numiter_y); + niter_z = int_cst_value (numiter_z); + + niter = MIN (niter_x, niter_z); + compute_overlap_steps_for_affine_univar (niter, step_x, step_z, + &overlaps_a_xz, + &overlaps_b_xz, + &last_conflicts_xz, 1); + niter = MIN (niter_y, niter_z); + compute_overlap_steps_for_affine_univar (niter, step_y, step_z, + &overlaps_a_yz, + &overlaps_b_yz, + &last_conflicts_yz, 2); + niter = MIN (niter_x, niter_z); + niter = MIN (niter_y, niter); + compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z, + &overlaps_a_xyz, + &overlaps_b_xyz, + &last_conflicts_xyz, 3); + + xz_p = !integer_zerop (last_conflicts_xz); + yz_p = !integer_zerop (last_conflicts_yz); + xyz_p = !integer_zerop (last_conflicts_xyz); + + if (xz_p || yz_p || xyz_p) + { + *overlaps_a = make_tree_vec (2); + TREE_VEC_ELT (*overlaps_a, 0) = integer_zero_node; + TREE_VEC_ELT (*overlaps_a, 1) = integer_zero_node; + *overlaps_b = integer_zero_node; + if (xz_p) + { + TREE_VEC_ELT (*overlaps_a, 0) = + chrec_fold_plus (integer_type_node, TREE_VEC_ELT (*overlaps_a, 0), + overlaps_a_xz); + *overlaps_b = + chrec_fold_plus (integer_type_node, *overlaps_b, overlaps_b_xz); + *last_conflicts = last_conflicts_xz; + } + if (yz_p) + { + TREE_VEC_ELT (*overlaps_a, 1) = + chrec_fold_plus (integer_type_node, TREE_VEC_ELT (*overlaps_a, 1), + overlaps_a_yz); + *overlaps_b = + chrec_fold_plus (integer_type_node, *overlaps_b, overlaps_b_yz); + *last_conflicts = last_conflicts_yz; + } + if (xyz_p) + { + TREE_VEC_ELT (*overlaps_a, 0) = + chrec_fold_plus (integer_type_node, TREE_VEC_ELT (*overlaps_a, 0), + overlaps_a_xyz); + TREE_VEC_ELT (*overlaps_a, 1) = + chrec_fold_plus (integer_type_node, TREE_VEC_ELT (*overlaps_a, 1), + overlaps_a_xyz); + *overlaps_b = + chrec_fold_plus (integer_type_node, *overlaps_b, overlaps_b_xyz); + *last_conflicts = last_conflicts_xyz; + } + } + else + { + *overlaps_a = integer_zero_node; + *overlaps_b = integer_zero_node; + *last_conflicts = integer_zero_node; + } } /* Determines the overlapping elements due to accesses CHREC_A and @@ -942,10 +1234,14 @@ static void analyze_subscript_affine_affine (tree chrec_a, tree chrec_b, tree *overlaps_a, - tree *overlaps_b) + tree *overlaps_b, + tree *last_conflicts) { - tree left_a, left_b, right_a, right_b; - + unsigned nb_vars_a, nb_vars_b, dim; + int init_a, init_b, gamma, gcd_alpha_beta; + int tau1, tau2; + lambda_matrix A, U, S; + if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "(analyze_subscript_affine_affine \n"); @@ -960,137 +1256,166 @@ analyze_subscript_affine_affine (tree chrec_a, there is no dependence. This function outputs a description of the iterations that hold the intersections. */ - left_a = CHREC_LEFT (chrec_a); - left_b = CHREC_LEFT (chrec_b); - right_a = CHREC_RIGHT (chrec_a); - right_b = CHREC_RIGHT (chrec_b); - if (chrec_zerop (chrec_fold_minus (integer_type_node, left_a, left_b))) - { - /* The first element accessed twice is on the first - iteration. */ - *overlaps_a = build_polynomial_chrec - (CHREC_VARIABLE (chrec_b), integer_zero_node, integer_one_node); - *overlaps_b = build_polynomial_chrec - (CHREC_VARIABLE (chrec_a), integer_zero_node, integer_one_node); - } - - else if (TREE_CODE (left_a) == INTEGER_CST - && TREE_CODE (left_b) == INTEGER_CST - && TREE_CODE (right_a) == INTEGER_CST - && TREE_CODE (right_b) == INTEGER_CST - - /* Both functions should have the same evolution sign. */ - && ((tree_int_cst_sgn (right_a) > 0 - && tree_int_cst_sgn (right_b) > 0) - || (tree_int_cst_sgn (right_a) < 0 - && tree_int_cst_sgn (right_b) < 0))) + nb_vars_a = nb_vars_in_chrec (chrec_a); + nb_vars_b = nb_vars_in_chrec (chrec_b); + + dim = nb_vars_a + nb_vars_b; + U = lambda_matrix_new (dim, dim); + A = lambda_matrix_new (dim, 1); + S = lambda_matrix_new (dim, 1); + + init_a = initialize_matrix_A (A, chrec_a, 0, 1); + init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1); + gamma = init_b - init_a; + + /* Don't do all the hard work of solving the Diophantine equation + when we already know the solution: for example, + | {3, +, 1}_1 + | {3, +, 4}_2 + | gamma = 3 - 3 = 0. + Then the first overlap occurs during the first iterations: + | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x) + */ + if (gamma == 0) { - /* Here we have to solve the Diophantine equation. Reference - book: "Loop Transformations for Restructuring Compilers - The - Foundations" by Utpal Banerjee, pages 59-80. - - ALPHA * X0 = BETA * Y0 + GAMMA. - - with: - ALPHA = RIGHT_A - BETA = RIGHT_B - GAMMA = LEFT_B - LEFT_A - CHREC_A = {LEFT_A, +, RIGHT_A} - CHREC_B = {LEFT_B, +, RIGHT_B} - - The Diophantine equation has a solution if and only if gcd - (ALPHA, BETA) divides GAMMA. This is commonly known under - the name of the "gcd-test". - */ - tree alpha, beta, gamma; - tree la, lb; - tree gcd_alpha_beta; - tree u11, u12, u21, u22; - - /* Both alpha and beta have to be integer_type_node. The gcd - function does not work correctly otherwise. */ - alpha = copy_node (right_a); - beta = copy_node (right_b); - la = copy_node (left_a); - lb = copy_node (left_b); - TREE_TYPE (alpha) = integer_type_node; - TREE_TYPE (beta) = integer_type_node; - TREE_TYPE (la) = integer_type_node; - TREE_TYPE (lb) = integer_type_node; - - gamma = fold (build (MINUS_EXPR, integer_type_node, lb, la)); - - /* FIXME: Use lambda_*_Hermite for implementing Bezout. */ - gcd_alpha_beta = tree_fold_bezout - (alpha, - fold (build (MULT_EXPR, integer_type_node, beta, - integer_minus_one_node)), - &u11, &u12, - &u21, &u22); - - if (dump_file && (dump_flags & TDF_DETAILS)) + if (nb_vars_a == 1 && nb_vars_b == 1) { - fprintf (dump_file, " (alpha = "); - print_generic_expr (dump_file, alpha, 0); - fprintf (dump_file, ")\n (beta = "); - print_generic_expr (dump_file, beta, 0); - fprintf (dump_file, ")\n (gamma = "); - print_generic_expr (dump_file, gamma, 0); - fprintf (dump_file, ")\n (gcd_alpha_beta = "); - print_generic_expr (dump_file, gcd_alpha_beta, 0); - fprintf (dump_file, ")\n"); + int step_a, step_b; + int niter, niter_a, niter_b; + tree numiter_a, numiter_b; + + numiter_a = number_of_iterations_in_loop + (current_loops->parray[CHREC_VARIABLE (chrec_a)]); + numiter_b = number_of_iterations_in_loop + (current_loops->parray[CHREC_VARIABLE (chrec_b)]); + + if (TREE_CODE (numiter_a) != INTEGER_CST) + numiter_a = current_loops->parray[CHREC_VARIABLE (chrec_a)] + ->estimated_nb_iterations; + if (TREE_CODE (numiter_b) != INTEGER_CST) + numiter_b = current_loops->parray[CHREC_VARIABLE (chrec_b)] + ->estimated_nb_iterations; + if (numiter_a == NULL_TREE || numiter_b == NULL_TREE) + { + *overlaps_a = chrec_dont_know; + *overlaps_b = chrec_dont_know; + *last_conflicts = chrec_dont_know; + return; + } + + niter_a = int_cst_value (numiter_a); + niter_b = int_cst_value (numiter_b); + niter = MIN (niter_a, niter_b); + + step_a = int_cst_value (CHREC_RIGHT (chrec_a)); + step_b = int_cst_value (CHREC_RIGHT (chrec_b)); + + compute_overlap_steps_for_affine_univar (niter, step_a, step_b, + overlaps_a, overlaps_b, + last_conflicts, 1); } - - /* The classic "gcd-test". */ - if (!tree_fold_divides_p (integer_type_node, gcd_alpha_beta, gamma)) + + else if (nb_vars_a == 2 && nb_vars_b == 1) + compute_overlap_steps_for_affine_1_2 + (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts); + + else if (nb_vars_a == 1 && nb_vars_b == 2) + compute_overlap_steps_for_affine_1_2 + (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts); + + else { - /* The "gcd-test" has determined that there is no integer - solution, i.e. there is no dependence. */ - *overlaps_a = chrec_known; - *overlaps_b = chrec_known; + *overlaps_a = chrec_dont_know; + *overlaps_b = chrec_dont_know; + *last_conflicts = chrec_dont_know; } - - else + return; + } + + /* U.A = S */ + lambda_matrix_right_hermite (A, dim, 1, S, U); + + if (S[0][0] < 0) + { + S[0][0] *= -1; + lambda_matrix_row_negate (U, dim, 0); + } + gcd_alpha_beta = S[0][0]; + + /* The classic "gcd-test". */ + if (!int_divides_p (gcd_alpha_beta, gamma)) + { + /* The "gcd-test" has determined that there is no integer + solution, i.e. there is no dependence. */ + *overlaps_a = chrec_known; + *overlaps_b = chrec_known; + *last_conflicts = integer_zero_node; + } + + /* Both access functions are univariate. This includes SIV and MIV cases. */ + else if (nb_vars_a == 1 && nb_vars_b == 1) + { + /* Both functions should have the same evolution sign. */ + if (((A[0][0] > 0 && -A[1][0] > 0) + || (A[0][0] < 0 && -A[1][0] < 0))) { /* The solutions are given by: | - | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [X] - | [u21 u22] [Y] - + | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0] + | [u21 u22] [y0] + For a given integer t. Using the following variables, - + | i0 = u11 * gamma / gcd_alpha_beta | j0 = u12 * gamma / gcd_alpha_beta | i1 = u21 | j1 = u22 - + the solutions are: - - | x = i0 + i1 * t, - | y = j0 + j1 * t. */ - - tree i0, j0, i1, j1, t; - tree gamma_gcd; - + + | x0 = i0 + i1 * t, + | y0 = j0 + j1 * t. */ + + int i0, j0, i1, j1; + /* X0 and Y0 are the first iterations for which there is a dependence. X0, Y0 are two solutions of the Diophantine equation: chrec_a (X0) = chrec_b (Y0). */ - tree x0, y0; - - /* Exact div because in this case gcd_alpha_beta divides - gamma. */ - gamma_gcd = fold (build (EXACT_DIV_EXPR, integer_type_node, gamma, - gcd_alpha_beta)); - i0 = fold (build (MULT_EXPR, integer_type_node, u11, gamma_gcd)); - j0 = fold (build (MULT_EXPR, integer_type_node, u12, gamma_gcd)); - i1 = u21; - j1 = u22; - - if ((tree_int_cst_sgn (i1) == 0 - && tree_int_cst_sgn (i0) < 0) - || (tree_int_cst_sgn (j1) == 0 - && tree_int_cst_sgn (j0) < 0)) + int x0, y0; + int niter, niter_a, niter_b; + tree numiter_a, numiter_b; + + numiter_a = number_of_iterations_in_loop + (current_loops->parray[CHREC_VARIABLE (chrec_a)]); + numiter_b = number_of_iterations_in_loop + (current_loops->parray[CHREC_VARIABLE (chrec_b)]); + + if (TREE_CODE (numiter_a) != INTEGER_CST) + numiter_a = current_loops->parray[CHREC_VARIABLE (chrec_a)] + ->estimated_nb_iterations; + if (TREE_CODE (numiter_b) != INTEGER_CST) + numiter_b = current_loops->parray[CHREC_VARIABLE (chrec_b)] + ->estimated_nb_iterations; + if (numiter_a == NULL_TREE || numiter_b == NULL_TREE) + { + *overlaps_a = chrec_dont_know; + *overlaps_b = chrec_dont_know; + *last_conflicts = chrec_dont_know; + return; + } + + niter_a = int_cst_value (numiter_a); + niter_b = int_cst_value (numiter_b); + niter = MIN (niter_a, niter_b); + + i0 = U[0][0] * gamma / gcd_alpha_beta; + j0 = U[0][1] * gamma / gcd_alpha_beta; + i1 = U[1][0]; + j1 = U[1][1]; + + if ((i1 == 0 && i0 < 0) + || (j1 == 0 && j0 < 0)) { /* There is no solution. FIXME: The case "i0 > nb_iterations, j0 > nb_iterations" @@ -1098,42 +1423,36 @@ analyze_subscript_affine_affine (tree chrec_a, upper bound of the iteration domain. */ *overlaps_a = chrec_known; *overlaps_b = chrec_known; - } - + *last_conflicts = integer_zero_node; + } + else { - if (tree_int_cst_sgn (i1) > 0) + if (i1 > 0) { - t = fold - (build (CEIL_DIV_EXPR, integer_type_node, - fold (build (MULT_EXPR, integer_type_node, i0, - integer_minus_one_node)), - i1)); - - if (tree_int_cst_sgn (j1) > 0) + tau1 = CEIL (-i0, i1); + tau2 = FLOOR_DIV (niter - i0, i1); + + if (j1 > 0) { - t = fold - (build (MAX_EXPR, integer_type_node, t, - fold (build (CEIL_DIV_EXPR, integer_type_node, - fold (build - (MULT_EXPR, - integer_type_node, j0, - integer_minus_one_node)), - j1)))); - - x0 = fold - (build (PLUS_EXPR, integer_type_node, i0, - fold (build - (MULT_EXPR, integer_type_node, i1, t)))); - y0 = fold - (build (PLUS_EXPR, integer_type_node, j0, - fold (build - (MULT_EXPR, integer_type_node, j1, t)))); - - *overlaps_a = build_polynomial_chrec - (CHREC_VARIABLE (chrec_b), x0, u21); - *overlaps_b = build_polynomial_chrec - (CHREC_VARIABLE (chrec_a), y0, u22); + int last_conflict; + tau1 = MAX (tau1, CEIL (-j0, j1)); + tau2 = MIN (tau2, FLOOR_DIV (niter - j0, j1)); + + x0 = (i1 * tau1 + i0) % i1; + y0 = (j1 * tau1 + j0) % j1; + tau1 = (x0 - i0)/i1; + last_conflict = tau2 - tau1; + + *overlaps_a = build_polynomial_chrec + (1, + build_int_cst (NULL_TREE, x0), + build_int_cst (NULL_TREE, i1)); + *overlaps_b = build_polynomial_chrec + (1, + build_int_cst (NULL_TREE, y0), + build_int_cst (NULL_TREE, j1)); + *last_conflicts = build_int_cst (NULL_TREE, last_conflict); } else { @@ -1141,27 +1460,36 @@ analyze_subscript_affine_affine (tree chrec_a, iteration domain for j is not checked. */ *overlaps_a = chrec_dont_know; *overlaps_b = chrec_dont_know; + *last_conflicts = chrec_dont_know; } } - + else { /* FIXME: For the moment, the upper bound of the iteration domain for i is not checked. */ *overlaps_a = chrec_dont_know; *overlaps_b = chrec_dont_know; + *last_conflicts = chrec_dont_know; } } } + else + { + *overlaps_a = chrec_dont_know; + *overlaps_b = chrec_dont_know; + *last_conflicts = chrec_dont_know; + } } - + else { - /* For the moment, "don't know". */ *overlaps_a = chrec_dont_know; *overlaps_b = chrec_dont_know; + *last_conflicts = chrec_dont_know; } - + + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, " (overlaps_a = "); @@ -1186,7 +1514,8 @@ static void analyze_siv_subscript (tree chrec_a, tree chrec_b, tree *overlaps_a, - tree *overlaps_b) + tree *overlaps_b, + tree *last_conflicts) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "(analyze_siv_subscript \n"); @@ -1194,22 +1523,22 @@ analyze_siv_subscript (tree chrec_a, if (evolution_function_is_constant_p (chrec_a) && evolution_function_is_affine_p (chrec_b)) analyze_siv_subscript_cst_affine (chrec_a, chrec_b, - overlaps_a, overlaps_b); + overlaps_a, overlaps_b, last_conflicts); else if (evolution_function_is_affine_p (chrec_a) && evolution_function_is_constant_p (chrec_b)) - analyze_siv_subscript_affine_cst (chrec_a, chrec_b, - overlaps_a, overlaps_b); + analyze_siv_subscript_cst_affine (chrec_b, chrec_a, + overlaps_b, overlaps_a, last_conflicts); else if (evolution_function_is_affine_p (chrec_a) - && evolution_function_is_affine_p (chrec_b) - && (CHREC_VARIABLE (chrec_a) == CHREC_VARIABLE (chrec_b))) + && evolution_function_is_affine_p (chrec_b)) analyze_subscript_affine_affine (chrec_a, chrec_b, - overlaps_a, overlaps_b); + overlaps_a, overlaps_b, last_conflicts); else { *overlaps_a = chrec_dont_know; *overlaps_b = chrec_dont_know; + *last_conflicts = chrec_dont_know; } if (dump_file && (dump_flags & TDF_DETAILS)) @@ -1246,7 +1575,8 @@ static void analyze_miv_subscript (tree chrec_a, tree chrec_b, tree *overlaps_a, - tree *overlaps_b) + tree *overlaps_b, + tree *last_conflicts) { /* FIXME: This is a MIV subscript, not yet handled. Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from @@ -1269,6 +1599,8 @@ analyze_miv_subscript (tree chrec_a, in the same order. */ *overlaps_a = integer_zero_node; *overlaps_b = integer_zero_node; + *last_conflicts = number_of_iterations_in_loop + (current_loops->parray[CHREC_VARIABLE (chrec_a)]); } else if (evolution_function_is_constant_p (difference) @@ -1283,25 +1615,28 @@ analyze_miv_subscript (tree chrec_a, consequently there are no overlapping elements. */ *overlaps_a = chrec_known; *overlaps_b = chrec_known; + *last_conflicts = integer_zero_node; } - else if (evolution_function_is_univariate_p (chrec_a) - && evolution_function_is_univariate_p (chrec_b)) + else if (evolution_function_is_affine_multivariate_p (chrec_a) + && evolution_function_is_affine_multivariate_p (chrec_b)) { /* testsuite/.../ssa-chrec-35.c {0, +, 1}_2 vs. {0, +, 1}_3 the overlapping elements are respectively located at iterations: - {0, +, 1}_3 and {0, +, 1}_2. + {0, +, 1}_x and {0, +, 1}_x, + in other words, we have the equality: + {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x) + + Other examples: + {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) = + {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y) + + {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) = + {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) */ - if (evolution_function_is_affine_p (chrec_a) - && evolution_function_is_affine_p (chrec_b)) - analyze_subscript_affine_affine (chrec_a, chrec_b, - overlaps_a, overlaps_b); - else - { - *overlaps_a = chrec_dont_know; - *overlaps_b = chrec_dont_know; - } + analyze_subscript_affine_affine (chrec_a, chrec_b, + overlaps_a, overlaps_b, last_conflicts); } else @@ -1309,6 +1644,7 @@ analyze_miv_subscript (tree chrec_a, /* When the analysis is too difficult, answer "don't know". */ *overlaps_a = chrec_dont_know; *overlaps_b = chrec_dont_know; + *last_conflicts = chrec_dont_know; } if (dump_file && (dump_flags & TDF_DETAILS)) @@ -1329,7 +1665,8 @@ static void analyze_overlapping_iterations (tree chrec_a, tree chrec_b, tree *overlap_iterations_a, - tree *overlap_iterations_b) + tree *overlap_iterations_b, + tree *last_conflicts) { if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -1354,15 +1691,18 @@ analyze_overlapping_iterations (tree chrec_a, else if (ziv_subscript_p (chrec_a, chrec_b)) analyze_ziv_subscript (chrec_a, chrec_b, - overlap_iterations_a, overlap_iterations_b); + overlap_iterations_a, overlap_iterations_b, + last_conflicts); else if (siv_subscript_p (chrec_a, chrec_b)) analyze_siv_subscript (chrec_a, chrec_b, - overlap_iterations_a, overlap_iterations_b); + overlap_iterations_a, overlap_iterations_b, + last_conflicts); else analyze_miv_subscript (chrec_a, chrec_b, - overlap_iterations_a, overlap_iterations_b); + overlap_iterations_a, overlap_iterations_b, + last_conflicts); if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -1386,6 +1726,7 @@ subscript_dependence_tester (struct data_dependence_relation *ddr) unsigned int i; struct data_reference *dra = DDR_A (ddr); struct data_reference *drb = DDR_B (ddr); + tree last_conflicts; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "(subscript_dependence_tester \n"); @@ -1397,7 +1738,8 @@ subscript_dependence_tester (struct data_dependence_relation *ddr) analyze_overlapping_iterations (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i), - &overlaps_a, &overlaps_b); + &overlaps_a, &overlaps_b, + &last_conflicts); if (chrec_contains_undetermined (overlaps_a) || chrec_contains_undetermined (overlaps_b)) @@ -1417,6 +1759,7 @@ subscript_dependence_tester (struct data_dependence_relation *ddr) { SUB_CONFLICTS_IN_A (subscript) = overlaps_a; SUB_CONFLICTS_IN_B (subscript) = overlaps_b; + SUB_LAST_CONFLICT (subscript) = last_conflicts; } } @@ -1428,7 +1771,8 @@ subscript_dependence_tester (struct data_dependence_relation *ddr) DDR is the data dependence relation to build a vector from. NB_LOOPS is the total number of loops we are considering. - FIRST_LOOP is the loop->num of the first loop. */ + FIRST_LOOP is the loop->num of the first loop in the analyzed + loop nest. */ static void build_classic_dist_vector (struct data_dependence_relation *ddr, @@ -1447,22 +1791,68 @@ build_classic_dist_vector (struct data_dependence_relation *ddr, for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) { + tree access_fn_a, access_fn_b; struct subscript *subscript = DDR_SUBSCRIPT (ddr, i); if (chrec_contains_undetermined (SUB_DISTANCE (subscript))) - return; + { + non_affine_dependence_relation (ddr); + return; + } + + access_fn_a = DR_ACCESS_FN (DDR_A (ddr), i); + access_fn_b = DR_ACCESS_FN (DDR_B (ddr), i); - if (TREE_CODE (SUB_CONFLICTS_IN_A (subscript)) == POLYNOMIAL_CHREC) + if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC + && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC) { - int dist; - int loop_nb; - loop_nb = CHREC_VARIABLE (SUB_CONFLICTS_IN_A (subscript)); + int dist, loop_nb; + int loop_nb_a = CHREC_VARIABLE (access_fn_a); + int loop_nb_b = CHREC_VARIABLE (access_fn_b); + struct loop *loop_a = current_loops->parray[loop_nb_a]; + struct loop *loop_b = current_loops->parray[loop_nb_b]; + + if (loop_nb_a != loop_nb_b + && !flow_loop_nested_p (loop_a, loop_b) + && !flow_loop_nested_p (loop_b, loop_a)) + { + /* Example: when there are two consecutive loops, + + | loop_1 + | A[{0, +, 1}_1] + | endloop_1 + | loop_2 + | A[{0, +, 1}_2] + | endloop_2 + + the dependence relation cannot be captured by the + distance abstraction. */ + non_affine_dependence_relation (ddr); + return; + } + + /* The dependence is carried by the outermost loop. Example: + | loop_1 + | A[{4, +, 1}_1] + | loop_2 + | A[{5, +, 1}_2] + | endloop_2 + | endloop_1 + In this case, the dependence is carried by loop_1. */ + loop_nb = loop_nb_a < loop_nb_b ? loop_nb_a : loop_nb_b; loop_nb -= first_loop; + /* If the loop number is still greater than the number of loops we've been asked to analyze, or negative, something is borked. */ gcc_assert (loop_nb >= 0); gcc_assert (loop_nb < nb_loops); + if (chrec_contains_undetermined (SUB_DISTANCE (subscript))) + { + non_affine_dependence_relation (ddr); + return; + } + dist = int_cst_value (SUB_DISTANCE (subscript)); /* This is the subscript coupling test. @@ -1505,6 +1895,7 @@ build_classic_dist_vector (struct data_dependence_relation *ddr, lca_nb -= first_loop; gcc_assert (lca_nb >= 0); gcc_assert (lca_nb < nb_loops); + /* For each outer loop where init_v is not set, the accesses are in dependence of distance 1 in the loop. */ if (lca != loop_a @@ -1519,8 +1910,13 @@ build_classic_dist_vector (struct data_dependence_relation *ddr, lca_nb = lca->num - first_loop; while (lca->depth != 0) { - gcc_assert (lca_nb >= 0); + /* If we're considering just a sub-nest, then don't record + any information on the outer loops. */ + if (lca_nb < 0) + break; + gcc_assert (lca_nb < nb_loops); + if (init_v[lca_nb] == 0) dist_v[lca_nb] = 1; lca = lca->outer; @@ -1531,13 +1927,15 @@ build_classic_dist_vector (struct data_dependence_relation *ddr, } DDR_DIST_VECT (ddr) = dist_v; + DDR_SIZE_VECT (ddr) = nb_loops; } /* Compute the classic per loop direction vector. DDR is the data dependence relation to build a vector from. NB_LOOPS is the total number of loops we are considering. - FIRST_LOOP is the loop->num of the first loop. */ + FIRST_LOOP is the loop->num of the first loop in the analyzed + loop nest. */ static void build_classic_dir_vector (struct data_dependence_relation *ddr, @@ -1556,15 +1954,55 @@ build_classic_dir_vector (struct data_dependence_relation *ddr, for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) { + tree access_fn_a, access_fn_b; struct subscript *subscript = DDR_SUBSCRIPT (ddr, i); - if (TREE_CODE (SUB_CONFLICTS_IN_A (subscript)) == POLYNOMIAL_CHREC - && TREE_CODE (SUB_CONFLICTS_IN_B (subscript)) == POLYNOMIAL_CHREC) + if (chrec_contains_undetermined (SUB_DISTANCE (subscript))) { - int loop_nb; - + non_affine_dependence_relation (ddr); + return; + } + + access_fn_a = DR_ACCESS_FN (DDR_A (ddr), i); + access_fn_b = DR_ACCESS_FN (DDR_B (ddr), i); + if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC + && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC) + { + int dist, loop_nb; enum data_dependence_direction dir = dir_star; - loop_nb = CHREC_VARIABLE (SUB_CONFLICTS_IN_A (subscript)); + int loop_nb_a = CHREC_VARIABLE (access_fn_a); + int loop_nb_b = CHREC_VARIABLE (access_fn_b); + struct loop *loop_a = current_loops->parray[loop_nb_a]; + struct loop *loop_b = current_loops->parray[loop_nb_b]; + + if (loop_nb_a != loop_nb_b + && !flow_loop_nested_p (loop_a, loop_b) + && !flow_loop_nested_p (loop_b, loop_a)) + { + /* Example: when there are two consecutive loops, + + | loop_1 + | A[{0, +, 1}_1] + | endloop_1 + | loop_2 + | A[{0, +, 1}_2] + | endloop_2 + + the dependence relation cannot be captured by the + distance abstraction. */ + non_affine_dependence_relation (ddr); + return; + } + + /* The dependence is carried by the outermost loop. Example: + | loop_1 + | A[{4, +, 1}_1] + | loop_2 + | A[{5, +, 1}_2] + | endloop_2 + | endloop_1 + In this case, the dependence is carried by loop_1. */ + loop_nb = loop_nb_a < loop_nb_b ? loop_nb_a : loop_nb_b; loop_nb -= first_loop; /* If the loop number is still greater than the number of @@ -1572,17 +2010,21 @@ build_classic_dir_vector (struct data_dependence_relation *ddr, something is borked. */ gcc_assert (loop_nb >= 0); gcc_assert (loop_nb < nb_loops); - if (!chrec_contains_undetermined (SUB_DISTANCE (subscript))) + + if (chrec_contains_undetermined (SUB_DISTANCE (subscript))) { - int dist = int_cst_value (SUB_DISTANCE (subscript)); - - if (dist == 0) - dir = dir_equal; - else if (dist > 0) - dir = dir_positive; - else if (dist < 0) - dir = dir_negative; + non_affine_dependence_relation (ddr); + return; } + + dist = int_cst_value (SUB_DISTANCE (subscript)); + + if (dist == 0) + dir = dir_equal; + else if (dist > 0) + dir = dir_positive; + else if (dist < 0) + dir = dir_negative; /* This is the subscript coupling test. | loop i = 0, N, 1 @@ -1625,6 +2067,7 @@ build_classic_dir_vector (struct data_dependence_relation *ddr, gcc_assert (lca_nb >= 0); gcc_assert (lca_nb < nb_loops); + /* For each outer loop where init_v is not set, the accesses are in dependence of distance 1 in the loop. */ if (lca != loop_a @@ -1638,8 +2081,13 @@ build_classic_dir_vector (struct data_dependence_relation *ddr, lca_nb = lca->num - first_loop; while (lca->depth != 0) { - gcc_assert (lca_nb >= 0); + /* If we're considering just a sub-nest, then don't record + any information on the outer loops. */ + if (lca_nb < 0) + break; + gcc_assert (lca_nb < nb_loops); + if (init_v[lca_nb] == 0) dir_v[lca_nb] = dir_positive; lca = lca->outer; @@ -1650,6 +2098,7 @@ build_classic_dir_vector (struct data_dependence_relation *ddr, } DDR_DIR_VECT (ddr) = dir_v; + DDR_SIZE_VECT (ddr) = nb_loops; } /* Returns true when all the access functions of A are affine or @@ -1734,12 +2183,11 @@ compute_all_dependences (varray_type datarefs, a = VARRAY_GENERIC_PTR (datarefs, i); b = VARRAY_GENERIC_PTR (datarefs, j); - ddr = initialize_data_dependence_relation (a, b); VARRAY_PUSH_GENERIC_PTR (*dependence_relations, ddr); compute_affine_dependence (ddr); - compute_distance_vector (ddr); + compute_subscript_distance (ddr); } } @@ -1752,12 +2200,13 @@ compute_all_dependences (varray_type datarefs, acceptable for the moment, since this function is used only for debugging purposes. */ -static tree +tree find_data_references_in_loop (struct loop *loop, varray_type *datarefs) { + bool dont_know_node_not_inserted = true; basic_block bb; block_stmt_iterator bsi; - + FOR_EACH_BB (bb) { if (!flow_bb_inside_loop_p (loop, bb)) @@ -1789,11 +2238,33 @@ find_data_references_in_loop (struct loop *loop, varray_type *datarefs) true)); else - return chrec_dont_know; + { + if (dont_know_node_not_inserted) + { + struct data_reference *res; + res = xmalloc (sizeof (struct data_reference)); + DR_STMT (res) = NULL_TREE; + DR_REF (res) = NULL_TREE; + DR_ACCESS_FNS (res) = NULL; + DR_BASE_NAME (res) = NULL; + DR_IS_READ (res) = false; + VARRAY_PUSH_GENERIC_PTR (*datarefs, res); + + dont_know_node_not_inserted = false; + } + } + + /* When there are no defs in the loop, the loop is parallel. */ + if (NUM_V_MAY_DEFS (STMT_V_MAY_DEF_OPS (stmt)) > 0 + || NUM_V_MUST_DEFS (STMT_V_MUST_DEF_OPS (stmt)) > 0) + bb->loop_father->parallel_p = false; } + + if (bb->loop_father->estimated_nb_iterations == NULL_TREE) + compute_estimated_nb_iterations (bb->loop_father); } - return NULL_TREE; + return dont_know_node_not_inserted ? NULL_TREE : chrec_dont_know; } @@ -1881,63 +2352,44 @@ analyze_all_data_dependences (struct loops *loops) { dump_data_dependence_relations (dump_file, dependence_relations); fprintf (dump_file, "\n\n"); - } - /* Don't dump distances in order to avoid to update the - testsuite. */ - if (dump_file && (dump_flags & TDF_DETAILS)) - { - for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++) - { - struct data_dependence_relation *ddr = - (struct data_dependence_relation *) - VARRAY_GENERIC_PTR (dependence_relations, i); - if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) - { - fprintf (dump_file, "DISTANCE_V ("); - print_lambda_vector (dump_file, DDR_DIST_VECT (ddr), loops->num); - fprintf (dump_file, ")\n"); - fprintf (dump_file, "DIRECTION_V ("); - print_lambda_vector (dump_file, DDR_DIR_VECT (ddr), loops->num); - fprintf (dump_file, ")\n"); - } - } - fprintf (dump_file, "\n\n"); - } - - if (dump_file && (dump_flags & TDF_STATS)) - { - unsigned nb_top_relations = 0; - unsigned nb_bot_relations = 0; - unsigned nb_basename_differ = 0; - unsigned nb_chrec_relations = 0; + if (dump_flags & TDF_DETAILS) + dump_dist_dir_vectors (dump_file, dependence_relations); - for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++) + if (dump_flags & TDF_STATS) { - struct data_dependence_relation *ddr; - ddr = VARRAY_GENERIC_PTR (dependence_relations, i); + unsigned nb_top_relations = 0; + unsigned nb_bot_relations = 0; + unsigned nb_basename_differ = 0; + unsigned nb_chrec_relations = 0; + + for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++) + { + struct data_dependence_relation *ddr; + ddr = VARRAY_GENERIC_PTR (dependence_relations, i); - if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr))) - nb_top_relations++; + if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr))) + nb_top_relations++; - else if (DDR_ARE_DEPENDENT (ddr) == chrec_known) - { - struct data_reference *a = DDR_A (ddr); - struct data_reference *b = DDR_B (ddr); - bool differ_p; + else if (DDR_ARE_DEPENDENT (ddr) == chrec_known) + { + struct data_reference *a = DDR_A (ddr); + struct data_reference *b = DDR_B (ddr); + bool differ_p; - if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b) - || (array_base_name_differ_p (a, b, &differ_p) && differ_p)) - nb_basename_differ++; - else - nb_bot_relations++; - } + if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b) + || (array_base_name_differ_p (a, b, &differ_p) && differ_p)) + nb_basename_differ++; + else + nb_bot_relations++; + } - else - nb_chrec_relations++; - } + else + nb_chrec_relations++; + } - gather_stats_on_scev_database (); + gather_stats_on_scev_database (); + } } free_dependence_relations (dependence_relations); @@ -1991,3 +2443,4 @@ free_data_refs (varray_type datarefs) } varray_clear (datarefs); } + diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index 8caafa4..ea3bb7d 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -79,10 +79,9 @@ struct subscript tree conflicting_iterations_in_a; tree conflicting_iterations_in_b; - /* These fields store the information about the iteration domain + /* This field stores the information about the iteration domain validity of the dependence relation. */ - tree last_conflict_in_a; - tree last_conflict_in_b; + tree last_conflict; /* Distance from the iteration that access a conflicting element in A to the iteration that access this same conflicting element in @@ -93,8 +92,7 @@ struct subscript #define SUB_CONFLICTS_IN_A(SUB) SUB->conflicting_iterations_in_a #define SUB_CONFLICTS_IN_B(SUB) SUB->conflicting_iterations_in_b -#define SUB_LAST_CONFLICT_IN_A(SUB) SUB->last_conflict_in_a -#define SUB_LAST_CONFLICT_IN_B(SUB) SUB->last_conflict_in_b +#define SUB_LAST_CONFLICT(SUB) SUB->last_conflict #define SUB_DISTANCE(SUB) SUB->distance /* A data_dependence_relation represents a relation between two @@ -106,6 +104,10 @@ struct data_dependence_relation struct data_reference *a; struct data_reference *b; + /* When the dependence relation is affine, it can be represented by + a distance vector. */ + bool affine_p; + /* A "yes/no/maybe" field for the dependence relation: - when "ARE_DEPENDENT == NULL_TREE", there exist a dependence @@ -124,6 +126,9 @@ struct data_dependence_relation the data_dependence_relation. */ varray_type subscripts; + /* The size of the direction/distance vectors. */ + int size_vect; + /* The classic direction vector. */ lambda_vector dir_vect; @@ -133,26 +138,32 @@ struct data_dependence_relation #define DDR_A(DDR) DDR->a #define DDR_B(DDR) DDR->b +#define DDR_AFFINE_P(DDR) DDR->affine_p #define DDR_ARE_DEPENDENT(DDR) DDR->are_dependent #define DDR_SUBSCRIPTS(DDR) DDR->subscripts #define DDR_SUBSCRIPTS_VECTOR_INIT(DDR, N) \ VARRAY_GENERIC_PTR_INIT (DDR_SUBSCRIPTS (DDR), N, "subscripts_vector"); #define DDR_SUBSCRIPT(DDR, I) VARRAY_GENERIC_PTR (DDR_SUBSCRIPTS (DDR), I) #define DDR_NUM_SUBSCRIPTS(DDR) VARRAY_ACTIVE_SIZE (DDR_SUBSCRIPTS (DDR)) +#define DDR_SIZE_VECT(DDR) DDR->size_vect #define DDR_DIR_VECT(DDR) DDR->dir_vect #define DDR_DIST_VECT(DDR) DDR->dist_vect -struct data_dependence_relation *initialize_data_dependence_relation +extern tree find_data_references_in_loop (struct loop *, varray_type *); +extern struct data_dependence_relation *initialize_data_dependence_relation (struct data_reference *, struct data_reference *); -void compute_affine_dependence (struct data_dependence_relation *); +extern void compute_affine_dependence (struct data_dependence_relation *); extern void analyze_all_data_dependences (struct loops *); extern void compute_data_dependences_for_loop (unsigned, struct loop *, varray_type *, varray_type *); extern struct data_reference * init_data_ref (tree, tree, tree, tree, bool); extern struct data_reference *analyze_array (tree, tree, bool); +extern void dump_subscript (FILE *, struct subscript *); +extern void dump_ddrs (FILE *, varray_type); +extern void dump_dist_dir_vectors (FILE *, varray_type); extern void dump_data_reference (FILE *, struct data_reference *); extern void dump_data_references (FILE *, varray_type); extern void dump_data_dependence_relation (FILE *, @@ -161,11 +172,12 @@ extern void dump_data_dependence_relations (FILE *, varray_type); extern void dump_data_dependence_direction (FILE *, enum data_dependence_direction); extern bool array_base_name_differ_p (struct data_reference *, - struct data_reference *, bool *p); + struct data_reference *, bool *); extern void free_dependence_relation (struct data_dependence_relation *); extern void free_dependence_relations (varray_type); extern void free_data_refs (varray_type); + #endif /* GCC_TREE_DATA_REF_H */ diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index 6eb44e1..0e2b870 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -36,6 +36,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "ggc.h" #include "tree-chrec.h" #include "tree-scalar-evolution.h" +#include "tree-data-ref.h" #include "params.h" #include "flags.h" #include "tree-inline.h" @@ -943,24 +944,10 @@ find_loop_niter_by_eval (struct loop *loop, edge *exit) */ -/* The structure describing a bound on number of iterations of a loop. */ - -struct nb_iter_bound -{ - tree bound; /* The expression whose value is an upper bound on the - number of executions of anything after ... */ - tree at_stmt; /* ... this statement during one execution of loop. */ - tree additional; /* A conjunction of conditions the operands of BOUND - satisfy. The additional information about the value - of the bound may be derived from it. */ - struct nb_iter_bound *next; - /* The next bound in a list. */ -}; - /* Records that AT_STMT is executed at most BOUND times in LOOP. The additional condition ADDITIONAL is recorded with the bound. */ -static void +void record_estimate (struct loop *loop, tree bound, tree additional, tree at_stmt) { struct nb_iter_bound *elt = xmalloc (sizeof (struct nb_iter_bound)); @@ -1010,8 +997,14 @@ estimate_numbers_of_iterations_loop (struct loop *loop) } free (exits); - /* TODO Here we could use other possibilities, like bounds of arrays accessed - in the loop. */ + /* Analyzes the bounds of arrays accessed in the loop. */ + if (loop->estimated_nb_iterations == NULL_TREE) + { + varray_type datarefs; + VARRAY_GENERIC_PTR_INIT (datarefs, 3, "datarefs"); + find_data_references_in_loop (loop, &datarefs); + free_data_refs (datarefs); + } } /* Records estimates on numbers of iterations of LOOPS. */ |