4 files changed, 80 insertions, 25 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0b23912..0128360 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2004-11-02  Daniel Berlin  <dberlin@dberlin.org>
+
+	* lambda-code.c (lambda_compute_auxillary_space): Update comments.
+	(lambda_compute_target_space). Ditto.
+	* lambda.h (lambda_trans_matrix): Ditto.
+	(lambda_linear_expression): Ditto.
+	(lambda_body_vector): Ditto.
+	(lambda_loopnest): Ditto.
+	* tree-loop-linear.c (gather_interchange_stats): Combine tests,
+	update comments, and remove pointless addition of 0.
+	(linear_transform_loops): Update comments.
+
 2004-11-03  Sebastian Pop  <pop@cri.ensmp.fr>
 
 	* tree.c (tree_fold_gcd): Use FLOOR_MOD_EXPR instead of
diff --git a/gcc/lambda-code.c b/gcc/lambda-code.c
index d564f43..0d066b9 100644
--- a/gcc/lambda-code.c
+++ b/gcc/lambda-code.c
@@ -651,7 +651,19 @@ compute_nest_using_fourier_motzkin (int size,
 }
 
 /* Compute the loop bounds for the auxiliary space NEST.
-   Input system used is Ax <= b.  TRANS is the unimodular transformation.  */
+   Input system used is Ax <= b.  TRANS is the unimodular transformation.  
+   Given the original nest, this function will 
+   1. Convert the nest into matrix form, which consists of a matrix for the
+   coefficients, a matrix for the 
+   invariant coefficients, and a vector for the constants.  
+   2. Use the matrix form to calculate the lattice base for the nest (which is
+   a dense space) 
+   3. Compose the dense space transform with the user specified transform, to 
+   get a transform we can easily calculate transformed bounds for.
+   4. Multiply the composed transformation matrix times the matrix form of the
+   loop.
+   5. Transform the newly created matrix (from step 4) back into a loop nest
+   using fourier motzkin elimination to figure out the bounds.  */
 
 static lambda_loopnest
 lambda_compute_auxillary_space (lambda_loopnest nest,
@@ -786,9 +798,11 @@ lambda_compute_auxillary_space (lambda_loopnest nest,
 }
 
 /* Compute the loop bounds for the target space, using the bounds of
-   the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.  This is
-   done by matrix multiplication and then transformation of the new matrix
-   back into linear expression form.
+   the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.  
+   The target space loop bounds are computed by multiplying the triangular
+   matrix H by the auxillary nest, to get the new loop bounds.  The sign of
+   the loop steps (positive or negative) is then used to swap the bounds if
+   the loop counts downwards.
    Return the target loopnest.  */
 
 static lambda_loopnest
diff --git a/gcc/lambda.h b/gcc/lambda.h
index b736024..98fe6bd 100644
--- a/gcc/lambda.h
+++ b/gcc/lambda.h
@@ -29,11 +29,14 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
    and scalar multiplication.  In this vector space, an element is a list of
    integers.  */
 typedef int *lambda_vector;
+
 /* An integer matrix.  A matrix consists of m vectors of length n (IE
    all vectors are the same length).  */
 typedef lambda_vector *lambda_matrix;
 
-/* A transformation matrix.  */
+/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
+   matrix.  Rather than use floats, we simply keep a single DENOMINATOR that
+   represents the denominator for every element in the matrix.  */
 typedef struct
 {
   lambda_matrix matrix;
@@ -46,7 +49,15 @@ typedef struct
 #define LTM_COLSIZE(T) ((T)->colsize)
 #define LTM_DENOMINATOR(T) ((T)->denominator)
 
-/* A vector representing a statement in the body of a loop.  */
+/* A vector representing a statement in the body of a loop.
+   The COEFFICIENTS vector contains a coefficient for each induction variable
+   in the loop nest containing the statement.
+   The DENOMINATOR represents the denominator for each coefficient in the
+   COEFFICIENT vector.
+
+   This structure is used during code generation in order to rewrite the old
+   induction variable uses in a statement in terms of the newly created
+   induction variables.  */
 typedef struct
 {
   lambda_vector coefficients;
@@ -57,7 +68,18 @@ typedef struct
 #define LBV_SIZE(T) ((T)->size)
 #define LBV_DENOMINATOR(T) ((T)->denominator)
 
-/* Piecewise linear expression.  */
+/* Piecewise linear expression.  
+   This structure represents a linear expression with terms for the invariants
+   and induction variables of a loop. 
+   COEFFICIENTS is a vector of coefficients for the induction variables, one
+   per loop in the loop nest.
+   CONSTANT is the constant portion of the linear expression
+   INVARIANT_COEFFICIENTS is a vector of coefficients for the loop invariants,
+   one per invariant.
+   DENOMINATOR is the denominator for all of the coefficients and constants in
+   the expression.  
+   The linear expressions can be linked together using the NEXT field, in
+   order to represent MAX or MIN of a group of linear expressions.  */
 typedef struct lambda_linear_expression_s
 {
   lambda_vector coefficients;
@@ -77,7 +99,12 @@ lambda_linear_expression lambda_linear_expression_new (int, int);
 void print_lambda_linear_expression (FILE *, lambda_linear_expression, int,
 				     int, char);
 
-/* Loop structure.  */
+/* Loop structure.  Our loop structure consists of a constant representing the
+   STEP of the loop, a set of linear expressions representing the LOWER_BOUND
+   of the loop, a set of linear expressions representing the UPPER_BOUND of
+   the loop, and a set of linear expressions representing the LINEAR_OFFSET of
+   the loop.  The linear offset is a set of linear expressions that are
+   applied to *both* the lower bound, and the upper bound.  */
 typedef struct lambda_loop_s
 {
   lambda_linear_expression lower_bound;
@@ -91,7 +118,12 @@ typedef struct lambda_loop_s
 #define LL_LINEAR_OFFSET(T) ((T)->linear_offset)
 #define LL_STEP(T)   ((T)->step)
 
-/* Loop nest structure.  */
+/* Loop nest structure.  
+   The loop nest structure consists of a set of loop structures (defined
+   above) in LOOPS, along with an integer representing the DEPTH of the loop,
+   and an integer representing the number of INVARIANTS in the loop.  Both of
+   these integers are used to size the associated coefficient vectors in the
+   linear expression structures.  */
 typedef struct
 {
   lambda_loop *loops;
diff --git a/gcc/tree-loop-linear.c b/gcc/tree-loop-linear.c
index 07afe5d..fcb93ea 100644
--- a/gcc/tree-loop-linear.c
+++ b/gcc/tree-loop-linear.c
@@ -88,7 +88,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
    DEPENDENCE_STEPS = 3000
    NB_DEPS_NOT_CARRIED_BY_LOOP = 7
    ACCESS_STRIDES = 8010
-  */
+*/
 
 static void
 gather_interchange_stats (varray_type dependence_relations, 
@@ -112,21 +112,17 @@ gather_interchange_stats (varray_type dependence_relations,
 	(struct data_dependence_relation *) 
 	VARRAY_GENERIC_PTR (dependence_relations, i);
 
-      /* Compute the dependence strides.  */
+      /* If we don't know anything about this dependence, or the distance
+	 vector is NULL, or there is no dependence, then there is no reuse of
+	 data.  */
 
-      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
-	{
-	  (*dependence_steps) += 0;
-	  continue;
-	}
+      if (DDR_DIST_VECT (ddr) == NULL
+	  || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
+	  || DDR_ARE_DEPENDENT (ddr) == chrec_known)
+	continue;
+      
 
-      /* When we know that there is no dependence, we know that there
-	 is no reuse of the data.  */
-      if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
-	{
-	  (*dependence_steps) += 0;
-	  continue;
-	}
+      
       dist = DDR_DIST_VECT (ddr)[loop_number];
       if (dist == 0)
 	(*nb_deps_not_carried_by_loop) += 1;
@@ -164,7 +160,8 @@ gather_interchange_stats (varray_type dependence_relations,
     }
 }
 
-/* Apply to TRANS any loop interchange that minimize inner loop steps.
+/* Attempt to apply interchange transformations to TRANS to maximize the
+   spatial and temporal locality of the loop.  
    Returns the new transform matrix.  The smaller the reuse vector
    distances in the inner loops, the fewer the cache misses.
    FIRST_LOOP is the loop->num of the first loop in the analyzed loop
@@ -238,7 +235,7 @@ void
 linear_transform_loops (struct loops *loops)
 {
   unsigned int i;
-
+  
   compute_immediate_uses (TDFA_USE_OPS | TDFA_USE_VOPS, NULL);
   for (i = 1; i < loops->num; i++)
     {