diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/lambda-code.c | 22 | ||||
-rw-r--r-- | gcc/lambda.h | 42 | ||||
-rw-r--r-- | gcc/tree-loop-linear.c | 29 |
4 files changed, 80 insertions, 25 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0b23912..0128360 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2004-11-02 Daniel Berlin <dberlin@dberlin.org> + + * lambda-code.c (lambda_compute_auxillary_space): Update comments. + (lambda_compute_target_space). Ditto. + * lambda.h (lambda_trans_matrix): Ditto. + (lambda_linear_expression): Ditto. + (lambda_body_vector): Ditto. + (lambda_loopnest): Ditto. + * tree-loop-linear.c (gather_interchange_stats): Combine tests, + update comments, and remove pointless addition of 0. + (linear_transform_loops): Update comments. + 2004-11-03 Sebastian Pop <pop@cri.ensmp.fr> * tree.c (tree_fold_gcd): Use FLOOR_MOD_EXPR instead of diff --git a/gcc/lambda-code.c b/gcc/lambda-code.c index d564f43..0d066b9 100644 --- a/gcc/lambda-code.c +++ b/gcc/lambda-code.c @@ -651,7 +651,19 @@ compute_nest_using_fourier_motzkin (int size, } /* Compute the loop bounds for the auxiliary space NEST. - Input system used is Ax <= b. TRANS is the unimodular transformation. */ + Input system used is Ax <= b. TRANS is the unimodular transformation. + Given the original nest, this function will + 1. Convert the nest into matrix form, which consists of a matrix for the + coefficients, a matrix for the + invariant coefficients, and a vector for the constants. + 2. Use the matrix form to calculate the lattice base for the nest (which is + a dense space) + 3. Compose the dense space transform with the user specified transform, to + get a transform we can easily calculate transformed bounds for. + 4. Multiply the composed transformation matrix times the matrix form of the + loop. + 5. Transform the newly created matrix (from step 4) back into a loop nest + using fourier motzkin elimination to figure out the bounds. */ static lambda_loopnest lambda_compute_auxillary_space (lambda_loopnest nest, @@ -786,9 +798,11 @@ lambda_compute_auxillary_space (lambda_loopnest nest, } /* Compute the loop bounds for the target space, using the bounds of - the auxiliary nest AUXILLARY_NEST, and the triangular matrix H. This is - done by matrix multiplication and then transformation of the new matrix - back into linear expression form. + the auxiliary nest AUXILLARY_NEST, and the triangular matrix H. + The target space loop bounds are computed by multiplying the triangular + matrix H by the auxillary nest, to get the new loop bounds. The sign of + the loop steps (positive or negative) is then used to swap the bounds if + the loop counts downwards. Return the target loopnest. */ static lambda_loopnest diff --git a/gcc/lambda.h b/gcc/lambda.h index b736024..98fe6bd 100644 --- a/gcc/lambda.h +++ b/gcc/lambda.h @@ -29,11 +29,14 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA and scalar multiplication. In this vector space, an element is a list of integers. */ typedef int *lambda_vector; + /* An integer matrix. A matrix consists of m vectors of length n (IE all vectors are the same length). */ typedef lambda_vector *lambda_matrix; -/* A transformation matrix. */ +/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE + matrix. Rather than use floats, we simply keep a single DENOMINATOR that + represents the denominator for every element in the matrix. */ typedef struct { lambda_matrix matrix; @@ -46,7 +49,15 @@ typedef struct #define LTM_COLSIZE(T) ((T)->colsize) #define LTM_DENOMINATOR(T) ((T)->denominator) -/* A vector representing a statement in the body of a loop. */ +/* A vector representing a statement in the body of a loop. + The COEFFICIENTS vector contains a coefficient for each induction variable + in the loop nest containing the statement. + The DENOMINATOR represents the denominator for each coefficient in the + COEFFICIENT vector. + + This structure is used during code generation in order to rewrite the old + induction variable uses in a statement in terms of the newly created + induction variables. */ typedef struct { lambda_vector coefficients; @@ -57,7 +68,18 @@ typedef struct #define LBV_SIZE(T) ((T)->size) #define LBV_DENOMINATOR(T) ((T)->denominator) -/* Piecewise linear expression. */ +/* Piecewise linear expression. + This structure represents a linear expression with terms for the invariants + and induction variables of a loop. + COEFFICIENTS is a vector of coefficients for the induction variables, one + per loop in the loop nest. + CONSTANT is the constant portion of the linear expression + INVARIANT_COEFFICIENTS is a vector of coefficients for the loop invariants, + one per invariant. + DENOMINATOR is the denominator for all of the coefficients and constants in + the expression. + The linear expressions can be linked together using the NEXT field, in + order to represent MAX or MIN of a group of linear expressions. */ typedef struct lambda_linear_expression_s { lambda_vector coefficients; @@ -77,7 +99,12 @@ lambda_linear_expression lambda_linear_expression_new (int, int); void print_lambda_linear_expression (FILE *, lambda_linear_expression, int, int, char); -/* Loop structure. */ +/* Loop structure. Our loop structure consists of a constant representing the + STEP of the loop, a set of linear expressions representing the LOWER_BOUND + of the loop, a set of linear expressions representing the UPPER_BOUND of + the loop, and a set of linear expressions representing the LINEAR_OFFSET of + the loop. The linear offset is a set of linear expressions that are + applied to *both* the lower bound, and the upper bound. */ typedef struct lambda_loop_s { lambda_linear_expression lower_bound; @@ -91,7 +118,12 @@ typedef struct lambda_loop_s #define LL_LINEAR_OFFSET(T) ((T)->linear_offset) #define LL_STEP(T) ((T)->step) -/* Loop nest structure. */ +/* Loop nest structure. + The loop nest structure consists of a set of loop structures (defined + above) in LOOPS, along with an integer representing the DEPTH of the loop, + and an integer representing the number of INVARIANTS in the loop. Both of + these integers are used to size the associated coefficient vectors in the + linear expression structures. */ typedef struct { lambda_loop *loops; diff --git a/gcc/tree-loop-linear.c b/gcc/tree-loop-linear.c index 07afe5d..fcb93ea 100644 --- a/gcc/tree-loop-linear.c +++ b/gcc/tree-loop-linear.c @@ -88,7 +88,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA DEPENDENCE_STEPS = 3000 NB_DEPS_NOT_CARRIED_BY_LOOP = 7 ACCESS_STRIDES = 8010 - */ +*/ static void gather_interchange_stats (varray_type dependence_relations, @@ -112,21 +112,17 @@ gather_interchange_stats (varray_type dependence_relations, (struct data_dependence_relation *) VARRAY_GENERIC_PTR (dependence_relations, i); - /* Compute the dependence strides. */ + /* If we don't know anything about this dependence, or the distance + vector is NULL, or there is no dependence, then there is no reuse of + data. */ - if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) - { - (*dependence_steps) += 0; - continue; - } + if (DDR_DIST_VECT (ddr) == NULL + || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know + || DDR_ARE_DEPENDENT (ddr) == chrec_known) + continue; + - /* When we know that there is no dependence, we know that there - is no reuse of the data. */ - if (DDR_ARE_DEPENDENT (ddr) == chrec_known) - { - (*dependence_steps) += 0; - continue; - } + dist = DDR_DIST_VECT (ddr)[loop_number]; if (dist == 0) (*nb_deps_not_carried_by_loop) += 1; @@ -164,7 +160,8 @@ gather_interchange_stats (varray_type dependence_relations, } } -/* Apply to TRANS any loop interchange that minimize inner loop steps. +/* Attempt to apply interchange transformations to TRANS to maximize the + spatial and temporal locality of the loop. Returns the new transform matrix. The smaller the reuse vector distances in the inner loops, the fewer the cache misses. FIRST_LOOP is the loop->num of the first loop in the analyzed loop @@ -238,7 +235,7 @@ void linear_transform_loops (struct loops *loops) { unsigned int i; - + compute_immediate_uses (TDFA_USE_OPS | TDFA_USE_VOPS, NULL); for (i = 1; i < loops->num; i++) { |