This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[lno] Linear loop transforms


This patch implements linear loop transforms for GCC.
These include every non-singular transform matrix you can apply to a loop nest, which means any combination of interchange, scaling, skewing, and reversal.


As tree-loop-linear.c says, "They are used to change the iteration order of loop nests in order to optimize data locality of traversals, or remove dependences that prevent parallelization/vectorization/etc."

If you wanted to, you can actually use it to rewrite any loop whose bounds you can express as a set of linear expressions, and step you can express as an integer. In other words, you don't have to give it a matrix transform if you don't want, you could simply tell it to change a loop to look like this or that, and it can do it.

Still todo include:
"TODO: Determine reuse vectors/matrix and use it to determine optimal transform matrix for locality purposes.
TODO: Add dependence matrix collection and approriate matrix calculations so we can determine if a given transformation matrix is legal for a loop.
TODO: Completion of partial transforms"


The files/functions are named lambda* because it is effectively a re-implementation of the lambda loop transformation toolkit (except for the parts still listed in the TODO). Anyone with a better name is welcome to let me know.

I bootstrapped it on powerpc-darwin and i686-pc-linux-gnu with the current identity matrix transform in place, which causes it to rewrite 15 loop nests to be exactly the same as they were before :).

I also have a set of interchange tests that i used with the #if'd out interchange transform, and it correctly rewrites the loop bounds/induction variables for those as well.

2004-03-04 Daniel Berlin <dberlin@dberlin.org>

* Makefile.in: Add lambda-mat.o, lambda-code.o, and tree-loop-linear.o.
* common.opt: Add -ftree-loop-linear.
* flags.h: Add flag_tree_loop_linear.
* opts.c: Handle tree-loop-linear option.
* timevar.def (TV_TREE_LINEAR_TRANSFORM): New.
* tree-flow.h (linear_transform_loops): New prototype.
* tree-optimize.c (pass_scev_linear_transform): New.
* tree-pass.h (pass_scev_linear_transform): Ditto.
* tree-scalar-evolution.c (scev_linear_transform): Ditto.
(gate_scev): Add check for flag_tree_loop_linear.
(gate_scev_linear_transform): New.
* lambda-code.c: New file.
* lambda-mat.c: New file.
* lambda-trans.c: New file.
* lambda.h: New file.
* tree-loop-linear.c: New file.
Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.903.2.158.2.16
diff -u -3 -p -w -B -b -r1.903.2.158.2.16 Makefile.in
--- Makefile.in 3 Mar 2004 18:42:13 -0000 1.903.2.158.2.16
+++ Makefile.in 4 Mar 2004 21:30:48 -0000
@@ -896,7 +896,8 @@ OBJS-common = \
targhooks.o timevar.o toplev.o tracer.o tree.o tree-dump.o unroll.o \
varasm.o varray.o version.o vmsdbgout.o xcoffout.o alloc-pool.o \
et-forest.o cfghooks.o bt-load.o pretty-print.o $(GGC) web.o \
- rtl-profile.o tree-profile.o
+ lambda-mat.o lambda-trans.o lambda-code.o rtl-profile.o tree-profile.o \
+ tree-loop-linear.o


OBJS-md = $(out_object_file)
OBJS-archive = $(EXTRA_OBJS) $(host_hook_obj) hashtable.o tree-inline.o \
@@ -1676,6 +1677,10 @@ tree-vectorizer.o: tree-vectorizer.c $(C
errors.h $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) diagnostic.h \
$(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) cfgloop.h tree-chrec.h tree-pass.h \
tree-vectorizer.h tree-data-ref.h tree-scalar-evolution.h tree-fold-const.h
+tree-loop-linear.o: tree-loop-linear.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+ errors.h $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) diagnostic.h \
+ $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) cfgloop.h tree-chrec.h tree-pass.h \
+ tree-data-ref.h tree-scalar-evolution.h tree-fold-const.h
c-call-graph.o : c-call-graph.c $(CONFIG_H) $(SYSTEM_H) $(C_TREE_H) \
$(C_COMMON_H) diagnostic.h hard-reg-set.h $(BASIC_BLOCK_H) $(TREE_FLOW_H) \
$(TM_H) coretypes.h
@@ -1841,7 +1846,13 @@ web.o : web.c $(CONFIG_H) $(SYSTEM_H) co
gcse.o : gcse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \
hard-reg-set.h flags.h real.h insn-config.h $(GGC_H) $(RECOG_H) $(EXPR_H) \
$(BASIC_BLOCK_H) function.h output.h toplev.h $(TM_P_H) $(PARAMS_H) \
- except.h gt-gcse.h $(TREE_H) cselib.h
+ except.h gt-gcse.h $(TREE_H)
+lambda-mat.o : lambda-mat.c lambda.h $(GGC_H) $(SYSTEM_H) $(CONFIG_H)
+lambda-trans.o: lambda-trans.c lambda.h $(GGC_H) $(SYSTEM_H) $(CONFIG_H)
+lambda-code.o: lambda-code.c lambda.h $(GGC_H) $(SYSTEM_H) $(CONFIG_H) \
+ errors.h $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) diagnostic.h \
+ $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) cfgloop.h tree-chrec.h \
+ tree-data-ref.h tree-scalar-evolution.h tree-fold-const.h
resource.o : resource.c $(CONFIG_H) $(RTL_H) hard-reg-set.h $(SYSTEM_H) coretypes.h \
$(TM_H) $(BASIC_BLOCK_H) $(REGS_H) flags.h output.h resource.h function.h toplev.h \
$(INSN_ATTR_H) except.h $(PARAMS_H) $(TM_P_H)
Index: common.opt
===================================================================
RCS file: /cvs/gcc/gcc/gcc/common.opt,v
retrieving revision 1.14.2.13.2.10
diff -u -3 -p -w -B -b -r1.14.2.13.2.10 common.opt
--- common.opt 3 Mar 2004 18:42:14 -0000 1.14.2.13.2.10
+++ common.opt 4 Mar 2004 21:30:48 -0000
@@ -739,6 +739,10 @@ ftree-dominator-opts
Common
Enable dominator optimizations


+ftree-loop-linear
+Common
+Enable linear loop transforms on trees
+
 ftree-dse
 Common
 Enable dead store elimination
Index: flags.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/flags.h,v
retrieving revision 1.86.2.41.2.8
diff -u -3 -p -w -B -b -r1.86.2.41.2.8 flags.h
--- flags.h	3 Mar 2004 18:42:14 -0000	1.86.2.41.2.8
+++ flags.h	4 Mar 2004 21:30:48 -0000
@@ -718,6 +718,9 @@ extern int flag_scalar_evolutions;
 /* Enable the analysis of all the data dependences.  */
 extern int flag_all_data_deps;

+/* Enable linear loop transforms on trees. */
+extern int flag_tree_loop_linear;
+
 /* Enable the elimination of checks on trees.  */
 extern int flag_tree_elim_checks;

Index: lambda-code.c
===================================================================
RCS file: lambda-code.c
diff -N lambda-code.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ lambda-code.c 4 Mar 2004 21:30:48 -0000
@@ -0,0 +1,1623 @@
+/* Loop transformation code generation
+ Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+ Contributed by Daniel Berlin <dberlin@dberlin.org>
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2, or (at your option) any later
+ version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to the Free
+ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "errors.h"
+#include "ggc.h"
+#include "tree.h"
+#include "target.h"
+#include "rtl.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "tree-fold-const.h"
+#include "expr.h"
+#include "optabs.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-pass.h"
+#include "tree-scalar-evolution.h"
+#include "varray.h"
+#include "lambda.h"
+
+
+/* Lattice stuff that is internal to the code generation algorithm. */
+
+typedef struct
+{
+ lambda_matrix base;
+ int dimension;
+ lambda_vector origin;
+ lambda_matrix origin_invariants;
+ int invariants;
+} * lambda_lattice;
+
+#define LATTICE_BASE(T) ((T)->base)
+#define LATTICE_DIMENSION(T) ((T)->dimension)
+#define LATTICE_ORIGIN(T) ((T)->origin)
+#define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
+#define LATTICE_INVARIANTS(T) ((T)->invariants)
+
+static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
+ int, int);
+static lambda_lattice lambda_lattice_new (int, int);
+static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
+
+static tree find_induction_var_from_exit_cond (struct loop *);
+
+
+/* Create a new lambda body vector. */
+
+lambda_body_vector
+lambda_body_vector_new (int size)
+{
+ lambda_body_vector ret;
+
+ ret = ggc_alloc (sizeof (*ret));
+ LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
+ LBV_SIZE (ret) = size;
+ LBV_DENOMINATOR (ret) = 1;
+ return ret;
+}
+
+/* Compute the new coefficients for the vector based on the
+ *inverse* of the transformation matrix. */
+
+lambda_body_vector
+lambda_body_vector_compute_new (lambda_trans_matrix transform,
+ lambda_body_vector vect)
+{
+ lambda_body_vector temp;
+ int depth;
+
+ /* Make sure the matrix is square. */
+ if (LTM_ROWSIZE (transform) != LTM_COLSIZE (transform))
+ abort ();
+
+ depth = LTM_ROWSIZE (transform);
+
+ temp = lambda_body_vector_new (depth);
+ LBV_DENOMINATOR (temp) = LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
+
+ lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
+ LTM_MATRIX (transform),
+ depth, LBV_COEFFICIENTS (temp));
+ LBV_SIZE (temp) = LBV_SIZE (vect);
+ return temp;
+}
+
+/* Print out a lambda body vector. */
+
+void
+print_lambda_body_vector (FILE *outfile, lambda_body_vector body)
+{
+ print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
+}
+
+/* Return TRUE if two linear expressions are equal. */
+
+static bool
+lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
+ int depth, int invariants)
+{
+ int i;
+
+ if (lle1 == NULL || lle2 == NULL)
+ return false;
+ if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
+ return false;
+ if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
+ return false;
+ for (i = 0; i < depth; i++)
+ if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
+ return false;
+ for (i = 0; i < invariants; i++)
+ if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] != LLE_INVARIANT_COEFFICIENTS (lle2)[i])
+ return false;
+ return true;
+}
+/* Create a new linear expression with dimension DIM, and total number
+ of invariants INVARIANTS. */
+
+lambda_linear_expression
+lambda_linear_expression_new (int dim, int invariants)
+{
+ lambda_linear_expression ret;
+
+ ret = ggc_alloc_cleared (sizeof (*ret));
+
+ LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
+ LLE_CONSTANT (ret) = 0;
+ LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
+ LLE_DENOMINATOR (ret) = 1;
+ LLE_NEXT (ret) = NULL;
+
+ return ret;
+}
+
+static void
+print_linear_expression (FILE *outfile, lambda_vector expr, int size,
+ char start)
+{
+ int i;
+ bool starting = true;
+ for (i = 0; i < size; i++)
+ {
+ if (expr[i] != 0)
+ {
+ if (starting)
+ {
+ if (expr[i] < 0)
+ fprintf (outfile, "-");
+ starting = false;
+ }
+ else if (expr[i] > 0)
+ fprintf (outfile, " + ");
+ else
+ fprintf (outfile, " - ");
+ if (expr[i] == 1 || expr [i] == -1)
+ fprintf (outfile, "%c", start + i);
+ else
+ fprintf (outfile, "%d%c", abs(expr[i]), start + i);
+ }
+ }
+}
+void
+print_lambda_linear_expression (FILE *outfile,
+ lambda_linear_expression expr,
+ int depth, int invariants, char start)
+{
+ fprintf (outfile, "\tLinear expression: ");
+ print_linear_expression (outfile, LLE_COEFFICIENTS (expr),
+ depth, start);
+ fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
+ fprintf (outfile, " invariants: ");
+ print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
+ invariants, 'A');
+ fprintf (outfile, " denominator: %d\n", LLE_DENOMINATOR (expr));
+}
+
+/* Print a loop. */
+
+void
+print_lambda_loop (FILE *outfile, lambda_loop loop, int depth,
+ int invariants, char start)
+{
+ int step;
+ lambda_linear_expression expr;
+
+ if (!loop)
+ abort ();
+
+ expr = LL_LINEAR_OFFSET (loop);
+ step = LL_STEP (loop);
+ fprintf (outfile, " step size = %d \n", step);
+
+ if (expr)
+ {
+ fprintf (outfile, " linear offset: \n");
+ print_lambda_linear_expression (outfile, expr, depth, invariants,
+ start);
+ }
+
+ fprintf (outfile, " lower bound: \n");
+ expr = LL_LOWER_BOUND (loop);
+ for (; expr != NULL; expr = LLE_NEXT (expr))
+ print_lambda_linear_expression (outfile, expr, depth, invariants,
+ start);
+ fprintf (outfile, " upper bound: \n");
+ expr = LL_UPPER_BOUND (loop);
+ for (; expr != NULL; expr = LLE_NEXT (expr))
+ print_lambda_linear_expression (outfile, expr, depth, invariants,
+ start);
+
+
+}
+
+/* Create a new loop nest structure. */
+
+lambda_loopnest
+lambda_loopnest_new (int depth, int invariants)
+{
+ lambda_loopnest ret;
+ ret = ggc_alloc (sizeof (*ret));
+
+ LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
+ LN_DEPTH (ret) = depth;
+ LN_INVARIANTS (ret) = invariants;
+
+ return ret;
+}
+
+
+/* Print a lambda loopnest structure. */
+
+void
+print_lambda_loopnest (FILE *outfile, lambda_loopnest nest, char start)
+{
+ int i;
+ for (i = 0; i < LN_DEPTH (nest); i++)
+ {
+ fprintf (outfile, "Loop %c\n", start + i);
+ print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
+ LN_INVARIANTS (nest), 'i');
+ fprintf (outfile, "\n");
+ }
+}
+
+/* Allocate a new lattice structure. */
+
+static lambda_lattice
+lambda_lattice_new (int depth, int invariants)
+{
+ lambda_lattice ret;
+ ret = ggc_alloc (sizeof (*ret));
+ LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
+ LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
+ LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
+ LATTICE_DIMENSION (ret) = depth;
+ LATTICE_INVARIANTS (ret) = invariants;
+ return ret;
+}
+
+/* Compute the lattice base for a loopnest. */
+
+static lambda_lattice
+lambda_lattice_compute_base (lambda_loopnest nest)
+{
+ lambda_lattice ret;
+ int depth, invariants;
+ lambda_matrix base;
+
+ int i, j, step;
+ lambda_loop loop;
+ lambda_linear_expression expression;
+
+ depth = LN_DEPTH (nest);
+ invariants = LN_INVARIANTS (nest);
+
+ ret = lambda_lattice_new (depth, invariants);
+ base = LATTICE_BASE (ret);
+ for (i = 0; i < depth; i++)
+ {
+ loop = LN_LOOPS(nest)[i];
+ if (!loop)
+ abort ();
+ step = LL_STEP (loop);
+ /* If we have a step of 1, then the base is one, and the
+ origin and invariant coefficients are 0. */
+ if (step == 1)
+ {
+ for (j = 0; j < depth; j++)
+ base[i][j] = 0;
+ base[i][i] = 1;
+ LATTICE_ORIGIN(ret)[i] = 0;
+ for (j = 0; j < invariants; j++)
+ LATTICE_ORIGIN_INVARIANTS(ret)[i][j] = 0;
+ }
+ else
+ {
+ /* Otherwise, we need the lower bound expression (which must
+ be an affine function) to determine the base. */
+ expression = LL_LOWER_BOUND (loop);
+ if (!expression
+ || LLE_NEXT (expression)
+ || LLE_DENOMINATOR (expression) != 1)
+ abort ();
+
+ for (j = 0; j < i; j ++)
+ base[i][j] = LLE_COEFFICIENTS (expression)[j]
+ * LL_STEP (LN_LOOPS(nest)[j]);
+ base[i][i] = step;
+ for (j = i + 1; j < depth; j++)
+ base[i][j] = 0;
+
+ /* Origin for this loop is the constant of the lower bound
+ expression. */
+ LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
+
+
+ /* Coefficient for the invariants are equal to the invariant
+ coefficients in the expression. */
+ for (j = 0; j < invariants; j++)
+ LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
+ LLE_INVARIANT_COEFFICIENTS (expression)[j];
+ }
+ }
+ return ret;
+}
+
+
+/* Compute the greatest common denominator of two numbers using
+ euclid's algorithm. */
+
+static int
+gcd (int a, int b)
+{
+
+ int x, y, z;
+
+ x = (a > 0) ? a : -1 * a;
+ y = (b > 0) ? b : -1 * b;
+
+ while (x>0)
+ {
+ z = y % x;
+ y = x;
+ x = z;
+ }
+
+ return (y);
+}
+
+/* Compute the greatest common denominator of a vector of numbers. */
+
+static int
+gcd_vector (lambda_vector vector, int size)
+{
+ int i;
+ int gcd1 = 0;
+
+
+ if (size > 0)
+ {
+ gcd1 = vector[0];
+ for (i = 1; i < size; i++)
+ gcd1 = gcd (gcd1, vector[i]);
+ }
+ return gcd1;
+}
+
+/* Compute the least common multiple of two numbers. */
+
+static int
+lcm (int a, int b)
+{
+ return (abs (a) * abs (b) / gcd (a, b) );
+}
+
+/* Compute the loop bounds for the auxillary space.
+ Input system is Ax <= b. TRANS is the unimodular transformation
+ matrix. Comments are transcribed from the equations in the paper. */
+
+static lambda_loopnest
+lambda_compute_auxillary_space (lambda_loopnest nest,
+ lambda_trans_matrix trans)
+{
+ lambda_matrix A, B, A1, B1, temp0;
+ lambda_vector a, a1, temp1;
+ lambda_matrix invertedtrans;
+ int determinant, depth, invariants, size, newsize;
+ int i, j, k;
+ lambda_loopnest auxillary_nest;
+ lambda_loop loop;
+ lambda_linear_expression expression;
+ lambda_lattice lattice;
+
+ int multiple, f1, f2;
+
+ depth = LN_DEPTH (nest);
+ invariants = LN_INVARIANTS (nest);
+ A = lambda_matrix_new (128, depth);
+ B = lambda_matrix_new (128, invariants);
+ a = lambda_vector_new (128);
+
+ A1 = lambda_matrix_new (128, depth);
+ B1 = lambda_matrix_new (128, invariants);
+ a1 = lambda_vector_new (128);
+
+ /* Store the bounds in the matrix A, vector a, and invariant matrix
+ B, so that we have Ax <= a + B. */
+ size = 0;
+ for (i = 0; i < depth; i++)
+ {
+ loop = LN_LOOPS(nest)[i];
+
+ /* First we do the lower bound. */
+ if (LL_STEP (loop) > 0)
+ expression = LL_LOWER_BOUND (loop);
+ else
+ expression = LL_UPPER_BOUND (loop);
+
+ for (; expression != NULL; expression = LLE_NEXT (expression))
+ {
+
+ /* Fill in the coefficient. */
+ for (j = 0; j < i; j++)
+ A[size][j] = LLE_COEFFICIENTS (expression)[j];
+
+ /* And the invariant coefficient. */
+ for (j = 0; j < invariants; j++)
+ B[size][j] = LLE_INVARIANT_COEFFICIENTS(expression)[j];
+
+ /* And the constant. */
+ a[size] = LLE_CONSTANT (expression);
+
+ /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. */
+ A[size][i] = -1 * LLE_DENOMINATOR (expression);
+ a[size] *= -1;
+ for (j = 0; j < invariants; j++)
+ B[size][j] *= -1;
+
+ size++;
+ }
+
+ /* Then do the exact same thing for the upper bounds. */
+ if (LL_STEP (loop) > 0)
+ expression = LL_UPPER_BOUND (loop);
+ else
+ expression = LL_LOWER_BOUND (loop);
+
+ for (; expression != NULL; expression = LLE_NEXT (expression))
+ {
+
+ /* Fill in the coefficient. */
+ for (j = 0; j < i; j++)
+ A[size][j] = LLE_COEFFICIENTS (expression)[j];
+
+ /* And the invariant coefficient. */
+ for (j = 0; j < invariants; j++)
+ B[size][j] = LLE_INVARIANT_COEFFICIENTS(expression)[j];
+
+ /* And the constant. */
+ a[size] = LLE_CONSTANT (expression);
+
+ /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */
+ for (j = 0; j < i; j++)
+ A[size][j] *= -1;
+ A[size][i] = LLE_DENOMINATOR (expression);
+ size++;
+ }
+ }
+
+ /* Compute the lattice base x = base * y + origin, where y is the
+ base space. */
+ lattice = lambda_lattice_compute_base (nest);
+
+
+ /* Ax <= a + B becomes ALy <= a+B - A*origin. L is the lattice base */
+
+ /* A1 = AL */
+ lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
+
+ /* a1 = a - A * origin constant. */
+ lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice),
+ a1);
+ lambda_vector_add_mc (a, 1, a1, -1, a1, size);
+
+ /* B1 = B - A * origin invariant. */
+ lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
+ invariants);
+ lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
+
+
+ /* Compute the auxillary space.
+ Equations:
+ given A1 * y <= b1 + B1
+ Compute the auxillary space for z1=HUy
+ A1 * y <= a1 + B1.
+ Let z be the target space.
+ z = Tx = T(Ly+origin) = TLy + T*origin
+ z1 = z-T*origin = TLy = HUy. */
+
+ invertedtrans = lambda_matrix_new (depth, depth);
+
+ /* Compute the inverse of U. */
+ determinant = lambda_matrix_inverse (LTM_MATRIX (trans),
+ invertedtrans, depth);
+
+ /* A = A1 inv(U). */
+ lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
+
+ /* Perform Fourier-Motzkin on Ax <= a + B. */
+
+ temp0 = B1;
+ B1 = B;
+ B = temp0;
+
+ temp1 = a1;
+ a1 = a;
+ a = temp1;
+
+ auxillary_nest = lambda_loopnest_new (depth, invariants);
+
+ for (i = depth - 1; i >= 0; i--)
+ {
+ loop = lambda_loop_new ();
+ LN_LOOPS(auxillary_nest)[i] = loop;
+ LL_STEP (loop) = 1;
+
+ for (j = 0; j < size; j++)
+ {
+ if (A[j][i] < 0)
+ {
+ /* Lower bound. */
+ expression = lambda_linear_expression_new (depth, invariants);
+
+ for (k = 0; k < i; k++)
+ LLE_COEFFICIENTS(expression)[k] = A[j][k];
+ for (k = 0; k < invariants; k++)
+ LLE_INVARIANT_COEFFICIENTS(expression)[k] = -1 * B[j][k];
+ LLE_DENOMINATOR (expression) = -1 * A[j][i];
+ LLE_CONSTANT (expression) = -1 * a[j];
+ /* Ignore if identical to the existing lower bound. */
+ if (!lle_equal (LL_LOWER_BOUND (loop),
+ expression, depth, invariants))
+ {
+ LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
+ LL_LOWER_BOUND (loop) = expression;
+ }
+
+
+ }
+ else if (A[j][i] > 0)
+ {
+ /* Upper bound. */
+ expression = lambda_linear_expression_new (depth, invariants);
+ for (k = 0; k < i; k++)
+ LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
+ LLE_CONSTANT (expression) = a[j];
+
+ for (k = 0; k < invariants; k++)
+ LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
+
+ LLE_DENOMINATOR (expression) = A[j][i];
+ /* Ignore if identical to the existing upper bound. */
+ if (!lle_equal (LL_UPPER_BOUND (loop),
+ expression, depth, invariants))
+ {
+ LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
+ LL_UPPER_BOUND (loop) = expression;
+ }
+
+ }
+ }
+ /* creates a new system by deleting the i'th variable. */
+ newsize = 0;
+ for (j = 0; j < size; j++)
+ {
+ if (A[j][i] == 0)
+ {
+ lambda_vector_copy (A[j], A1[newsize], depth);
+ lambda_vector_copy (B[j], B1[newsize], invariants);
+ a1[newsize] = a[j];
+ newsize++;
+ }
+ else if (A[j][i] > 0)
+ {
+ for (k = 0; k < size; k++)
+ {
+ if (A[k][i] < 0)
+ {
+ multiple = lcm (A[j][i], A[k][i]);
+ f1 = multiple / A[j][i];
+ f2 = -1 * multiple / A[k][i];
+
+ lambda_vector_add_mc (A[j], f1, A[k], f2,
+ A1[newsize], depth);
+ lambda_vector_add_mc (B[j], f1, B[k], f2,
+ B1[newsize], invariants);
+ a1[newsize] = f1 * a[j] + f2 * a[k];
+ newsize++;
+ }
+ }
+ }
+ }
+
+ temp0 = A;
+ A = A1;
+ A1 = temp0;
+
+ temp0 = B;
+ B = B1;
+ B1 = temp0;
+
+ temp1 = a;
+ a = a1;
+ a1 = temp1;
+
+ size = newsize;
+ }
+
+ return auxillary_nest;
+}
+
+
+/* Compute the loop bounds for the target space, using the bounds of
+ the auxillary nest.
+ Output a new set of linear bounds and linear offsets. */
+
+static lambda_loopnest
+lambda_compute_target_space (lambda_loopnest auxillary_nest,
+ lambda_trans_matrix H,
+ lambda_vector stepsigns)
+{
+ lambda_matrix inverse, H1;
+ int determinant, i, j;
+ int gcd1, gcd2;
+ int factor;
+
+
+ lambda_loopnest target_nest;
+ int depth, invariants;
+ lambda_matrix target;
+
+ lambda_loop auxillary_loop, target_loop;
+ lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
+
+ depth = LN_DEPTH (auxillary_nest);
+ invariants = LN_INVARIANTS (auxillary_nest);
+
+ inverse = lambda_matrix_new (depth, depth);
+ determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
+
+ /* H1 is H excluding its diagonal. */
+ H1 = lambda_matrix_new (depth, depth);
+ lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
+
+ for (i = 0; i < depth; i++)
+ H1[i][i] = 0;
+
+ /* Computes the linear offsets of the loop bounds. */
+ target = lambda_matrix_new (depth, depth);
+ lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
+
+ target_nest = lambda_loopnest_new (depth, invariants);
+
+ for (i = 0; i < depth; i++)
+ {
+
+ /* Get a new loop structure. */
+ target_loop = lambda_loop_new ();
+ LN_LOOPS(target_nest)[i] = target_loop;
+
+ /* Computes the gcd of the coefficients of the linear part. */
+ gcd1 = gcd_vector (target[i], i);
+
+ /* Include the denominator in the GCD */
+ gcd1 = gcd (gcd1, determinant);
+
+ /* Now divide through by the gcd */
+ for (j = 0; j < i; j++)
+ target[i][j] = target[i][j] / gcd1;
+
+ expression = lambda_linear_expression_new (depth, invariants);
+ lambda_vector_copy (target[i], LLE_COEFFICIENTS(expression), depth);
+ LLE_DENOMINATOR (expression) = determinant / gcd1;
+ LLE_CONSTANT (expression) = 0;
+ lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression), invariants);
+ LL_LINEAR_OFFSET (target_loop) = expression;
+ }
+
+ /* For each loop, compute the new bounds. */
+ for (i = 0; i < depth; i++)
+ {
+ auxillary_loop = LN_LOOPS(auxillary_nest)[i];
+ target_loop = LN_LOOPS(target_nest)[i];
+ LL_STEP (target_loop) = LTM_MATRIX(H)[i][i];
+ factor = LTM_MATRIX(H)[i][i];
+
+ /* First we do the lower bound. */
+ auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
+
+ for (; auxillary_expr != NULL; auxillary_expr = LLE_NEXT (auxillary_expr))
+ {
+ target_expr = lambda_linear_expression_new (depth, invariants);
+ lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
+ depth, inverse, depth,
+ LLE_COEFFICIENTS (target_expr));
+ lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
+ LLE_COEFFICIENTS (target_expr), depth,
+ factor);
+
+ LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
+ lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
+ LLE_INVARIANT_COEFFICIENTS (target_expr),
+ invariants);
+ lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
+ LLE_INVARIANT_COEFFICIENTS (target_expr),
+ invariants, factor);
+ LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
+
+ if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
+ {
+ LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
+ * determinant;
+ lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
+ LLE_INVARIANT_COEFFICIENTS (target_expr),
+ invariants,
+ determinant);
+ LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (target_expr)
+ * determinant;
+ }
+ /* Find the gcd and divide by it here, rather than doing it
+ at the tree level. */
+ gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
+ gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
+ invariants);
+ gcd1 = gcd (gcd1, gcd2);
+ gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
+ gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
+ for (j = 0; j < depth; j++)
+ LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
+ for (j = 0; j < invariants; j++)
+ LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
+ LLE_CONSTANT (target_expr) /= gcd1;
+ LLE_DENOMINATOR (target_expr) /= gcd1;
+ /* Ignore if identical to existing bound. */
+ if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
+ invariants))
+ {
+ LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
+ LL_LOWER_BOUND (target_loop) = target_expr;
+ }
+ }
+ /* Now do the upper bound. */
+ auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
+
+ for (; auxillary_expr != NULL; auxillary_expr = LLE_NEXT (auxillary_expr))
+ {
+ target_expr = lambda_linear_expression_new (depth, invariants);
+ lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
+ depth, inverse, depth,
+ LLE_COEFFICIENTS (target_expr));
+ lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
+ LLE_COEFFICIENTS (target_expr), depth,
+ factor);
+ LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
+ lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
+ LLE_INVARIANT_COEFFICIENTS (target_expr),
+ invariants);
+ lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
+ LLE_INVARIANT_COEFFICIENTS (target_expr),
+ invariants, factor);
+ LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
+
+ if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
+ {
+ LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
+ * determinant;
+ lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
+ LLE_INVARIANT_COEFFICIENTS (target_expr),
+ invariants,
+ determinant);
+ LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (target_expr)
+ * determinant;
+ }
+ /* Find the gcd and divide by it here, instead of at the
+ tree level. */
+ gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
+ gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
+ invariants);
+ gcd1 = gcd (gcd1, gcd2);
+ gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
+ gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
+ for (j = 0; j < depth; j++)
+ LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
+ for (j = 0; j < invariants; j++)
+ LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
+ LLE_CONSTANT (target_expr) /= gcd1;
+ LLE_DENOMINATOR (target_expr) /= gcd1;
+ /* Ignore if equal to existing bound. */
+ if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
+ invariants))
+ {
+ LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
+ LL_UPPER_BOUND (target_loop) = target_expr;
+ }
+ }
+ }
+ for (i = 0; i < depth; i++)
+ {
+ target_loop = LN_LOOPS (target_nest)[i];
+ /* If necessary, exchange the upper and lower bounds and negate
+ the step size. */
+ if (stepsigns[i] < 0)
+ {
+ LL_STEP (target_loop) *= -1;
+ tmp_expr = LL_LOWER_BOUND (target_loop);
+ LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
+ LL_UPPER_BOUND (target_loop) = tmp_expr;
+ }
+ }
+ return target_nest;
+}
+
+/* Compute the step signs. */
+
+static lambda_vector
+lambda_compute_step_signs (lambda_trans_matrix trans,
+ lambda_vector stepsigns)
+{
+ lambda_matrix matrix, H;
+ int size;
+ lambda_vector newsteps;
+ int i, j, factor, minimum_column;
+ int temp;
+
+ matrix = LTM_MATRIX (trans);
+ size = LTM_ROWSIZE (trans);
+ H = lambda_matrix_new (size, size);
+
+ newsteps = lambda_vector_new (size);
+ lambda_vector_copy (stepsigns, newsteps, size);
+
+ lambda_matrix_copy (matrix, H, size, size);
+
+ for (j = 0; j < size; j++)
+ {
+ lambda_vector row;
+ row = H[j];
+ for (i = j; i < size; i++)
+ if (row[i] < 0)
+ lambda_matrix_col_negate (H, size, i);
+ while (lambda_vector_first_nz (row, size, j + 1) < size)
+ {
+ minimum_column = lambda_vector_min_nz (row, size, j);
+ lambda_matrix_col_exchange (H, size, j, minimum_column);
+
+ temp = newsteps[j];
+ newsteps[j] = newsteps[minimum_column];
+ newsteps[minimum_column] = temp;
+
+ for (i = j + 1; i < size; i++)
+ {
+ factor = row[i] / row[j];
+ lambda_matrix_col_add (H, size, j, i, -1 * factor);
+ }
+ }
+ }
+ return newsteps;
+}
+
+/* Transform NEST according to TRANS, and return the new loop nest. */
+
+lambda_loopnest
+lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
+{
+ lambda_loopnest auxillary_nest, target_nest;
+
+ int depth, invariants;
+ int i, j;
+ lambda_lattice lattice;
+ lambda_trans_matrix trans1, H, U;
+ lambda_loop loop;
+ lambda_linear_expression expression;
+ lambda_vector origin;
+ lambda_matrix origin_invariants;
+ lambda_vector stepsigns;
+ int f;
+
+ depth = LN_DEPTH (nest);
+ invariants = LN_INVARIANTS (nest);
+
+ stepsigns = lambda_vector_new (depth);
+ for (i = 0; i < depth; i++)
+ {
+ if (LL_STEP (LN_LOOPS(nest)[i]) > 0)
+ stepsigns[i] = 1;
+ else
+ stepsigns[i] = -1;
+ }
+ lattice = lambda_lattice_compute_base (nest);
+ trans1 = lambda_trans_matrix_new (depth, depth);
+ lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
+ LTM_MATRIX (trans1),
+ depth, depth, depth);
+ H = lambda_trans_matrix_new (depth, depth);
+ U = lambda_trans_matrix_new (depth, depth);
+ lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H), LTM_MATRIX (U));
+
+ auxillary_nest = lambda_compute_auxillary_space (nest, U);
+
+ stepsigns = lambda_compute_step_signs (trans1, stepsigns);
+
+ target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
+
+ origin = lambda_vector_new (depth);
+ origin_invariants = lambda_matrix_new (depth, invariants);
+ lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
+ LATTICE_ORIGIN (lattice), origin);
+ lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
+ origin_invariants, depth, depth, invariants);
+ for (i = 0; i < depth; i++)
+ {
+ loop = LN_LOOPS (target_nest)[i];
+ expression = LL_LINEAR_OFFSET (loop);
+ if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
+ f = 1;
+ else
+ f = LLE_DENOMINATOR (expression);
+ LLE_CONSTANT (expression) += f * origin[i];
+
+ for (j = 0; j < invariants; j++)
+ LLE_INVARIANT_COEFFICIENTS(expression)[j] += f * origin_invariants[i][j];
+ }
+
+
+ return target_nest;
+
+}
+
+
+/* Convert a gcc tree expression to a linear expression.
+ This is a trivial implementation. */
+
+static lambda_linear_expression
+gcc_tree_to_linear_expression (int depth, tree expr,
+ varray_type outerinductionvars,
+ bool minusone)
+{
+ lambda_linear_expression lle = NULL;
+ switch (TREE_CODE (expr))
+ {
+ case INTEGER_CST:
+ {
+ lle = lambda_linear_expression_new (depth, 0);
+ LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
+ if (minusone)
+ LLE_CONSTANT (lle) -= 1;
+
+ LLE_DENOMINATOR (lle) = 1;
+ }
+ break;
+ case SSA_NAME:
+ {
+ size_t i;
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (outerinductionvars); i++)
+ if (VARRAY_TREE (outerinductionvars, i) != NULL)
+ {
+ tree iv = VARRAY_TREE (outerinductionvars, i);
+ if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
+ {
+ lle = lambda_linear_expression_new (depth, 0);
+ LLE_COEFFICIENTS (lle)[i] = 1;
+ if (minusone)
+ LLE_CONSTANT (lle) = -1;
+
+ LLE_DENOMINATOR (lle) = 1;
+ }
+ }
+ }
+ break;
+ default:
+ return NULL;
+ }
+
+ return lle;
+}
+
+/* Generate a lambda loop from a gcc loop. */
+
+static lambda_loop
+gcc_loop_to_lambda_loop (struct loop *loop, int depth,
+ int invariants ATTRIBUTE_UNUSED,
+ tree *ourinductionvar,
+ varray_type outerinductionvars)
+{
+ tree phi;
+ tree exit_cond;
+ tree access_fn, inductionvar;
+ tree step;
+ lambda_loop lloop = NULL;
+ lambda_linear_expression lbound, ubound;
+ tree test;
+ int stepint;
+ use_optype uses;
+
+
+ /* Find out induction var and set the pointer so that the caller can
+ append it to the outerinductionvars array later. */
+
+ inductionvar = find_induction_var_from_exit_cond (loop);
+ *ourinductionvar = inductionvar;
+
+ exit_cond = get_loop_exit_condition (loop);
+
+ if (inductionvar == NULL || exit_cond == NULL)
+ {
+ if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
+ fprintf (tree_dump_file, "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
+ return NULL;
+ }
+
+
+
+ test = TREE_OPERAND (exit_cond, 0);
+ if (TREE_CODE (test) != LE_EXPR && TREE_CODE (test) != LT_EXPR)
+ {
+
+ if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
+ {
+ fprintf (tree_dump_file, "Unable to convert loop: Loop exit test uses unhandled test condition:");
+ print_generic_stmt (tree_dump_file, test, 0);
+ fprintf (tree_dump_file, "\n");
+ }
+ return NULL;
+ }
+
+
+ /* XXX - MUST BE BETTER WAY TO GET PHI NODE FOR INDUCTION
+ VARIABLES. */
+ if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
+ {
+
+ if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
+ fprintf (tree_dump_file, "Unable to convert loop: Cannot find PHI node for induction variable\n");
+
+ return NULL;
+ }
+
+ phi = SSA_NAME_DEF_STMT (inductionvar);
+ if (TREE_CODE (phi) != PHI_NODE)
+ {
+ uses = STMT_USE_OPS (phi);
+
+ if (!uses)
+ {
+
+ if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
+ fprintf (tree_dump_file, "Unable to convert loop: Cannot find PHI node for induction variable\n");
+
+ return NULL;
+ }
+
+ phi = USE_OP (uses, 0);
+ phi = SSA_NAME_DEF_STMT (phi);
+ if (TREE_CODE (phi) != PHI_NODE)
+ {
+
+ if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
+ fprintf (tree_dump_file, "Unable to convert loop: Cannot find PHI node for induction variable\n");
+ return NULL;
+ }
+
+ }
+
+ access_fn = instantiate_parameters
+ (loop_num (loop),
+ analyze_scalar_evolution (loop_num (loop), PHI_RESULT (phi)));
+ if (!access_fn)
+ {
+ if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
+ fprintf (tree_dump_file, "Unable to convert loop: Access function for induction variable phi is NULL\n");
+
+ return NULL;
+ }
+
+ step = evolution_part_in_loop_num (access_fn, loop_num (loop));
+ if (!step)
+ {
+ if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
+ fprintf (tree_dump_file, "Unable to convert loop: Cannot determine step of loop.\n");
+
+ return NULL;
+ }
+ if (TREE_CODE (step) != INTEGER_CST)
+ {
+
+ if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
+ fprintf (tree_dump_file, "Unable to convert loop: Step of loop is not integer.\n");
+ return NULL;
+ }
+
+ stepint = TREE_INT_CST_LOW (step);
+
+ /* Only want phis for induction vars, which will have two
+ arguments. */
+ if (PHI_NUM_ARGS (phi) != 2)
+ {
+ if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
+ fprintf (tree_dump_file, "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
+ return NULL;
+ }
+
+ /* Another induction variable check. One argument's source should be
+ in the loop, one outside the loop. */
+ if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
+ && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
+ {
+
+ if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
+ fprintf (tree_dump_file, "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
+
+ return NULL;
+ }
+
+
+
+ if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
+
+ lbound = gcc_tree_to_linear_expression (depth, PHI_ARG_DEF (phi, 1),
+ outerinductionvars, false);
+ else
+ lbound = gcc_tree_to_linear_expression (depth, PHI_ARG_DEF (phi, 0),
+ outerinductionvars, false);
+ if (!lbound)
+ {
+
+ if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
+ fprintf (tree_dump_file, "Unable to convert loop: Cannot convert lower bound to linear expression\n");
+
+ return NULL;
+ }
+
+ ubound = gcc_tree_to_linear_expression (depth,
+ TREE_OPERAND (test, 1),
+ outerinductionvars,
+ TREE_CODE (exit_cond) == LT_EXPR);
+ if (!ubound)
+ {
+
+ if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
+ fprintf (tree_dump_file, "Unable to convert loop: Cannot convert upper bound to linear expression\n");
+ return NULL;
+ }
+
+
+ lloop = lambda_loop_new ();
+ LL_STEP (lloop) = stepint;
+ LL_LOWER_BOUND (lloop) = lbound;
+ LL_UPPER_BOUND (lloop) = ubound;
+ return lloop;
+}
+
+/* Given an exit condition, find the induction variable it is testing
+ against. */
+
+static tree
+find_induction_var_from_exit_cond (struct loop *loop)
+{
+ tree expr = get_loop_exit_condition (loop);
+ tree test;
+ if (expr == NULL_TREE)
+ return NULL_TREE;
+ if (TREE_CODE (expr) != COND_EXPR)
+ return NULL_TREE;
+ test = TREE_OPERAND (expr, 0);
+ if (TREE_CODE_CLASS (TREE_CODE (test)) != '<')
+ return NULL_TREE;
+ if (TREE_CODE (TREE_OPERAND (test, 0)) != SSA_NAME)
+ return NULL_TREE;
+ return TREE_OPERAND (test, 0);
+}
+
+/* Generate a lambda loopnest from a gcc loopnest. */
+
+lambda_loopnest
+gcc_loopnest_to_lambda_loopnest (struct loop *loop_nest,
+ varray_type *inductionvars)
+{
+ lambda_loopnest ret;
+ struct loop *temp;
+ int invariants = 0;
+ int depth = 0;
+ size_t i;
+ varray_type loops;
+ lambda_loop newloop;
+ tree inductionvar = NULL;
+
+
+ temp = loop_nest;
+ while (temp)
+ {
+ depth++;
+ temp = temp->inner;
+ }
+ VARRAY_GENERIC_PTR_INIT (loops, 1, "Loop nest");
+ VARRAY_GENERIC_PTR_INIT (*inductionvars, 1, "induction vars");
+ temp = loop_nest;
+ while (temp)
+ {
+ newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
+ &inductionvar, *inductionvars);
+ if (!newloop)
+ return NULL;
+ VARRAY_PUSH_GENERIC_PTR (*inductionvars, inductionvar);
+ VARRAY_PUSH_GENERIC_PTR (loops, newloop);
+ temp = temp->inner;
+ }
+ ret = lambda_loopnest_new (depth, invariants);
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (loops); i++)
+ LN_LOOPS(ret)[i] = VARRAY_GENERIC_PTR (loops, i);
+
+
+ return ret;
+
+}
+
+static tree build_int_cst (tree type, unsigned HOST_WIDE_INT val);
+
+/* Builds integer constant of type TYPE and value VAL.
+ Borrowed from tree-ssa-loop-ivopts.c
+ XXX: Move this into tree.c and remove from both files. */
+
+static tree
+build_int_cst (tree type, unsigned HOST_WIDE_INT val)
+{
+ unsigned bits = TYPE_PRECISION (type);
+ bool signed_p = !TREE_UNSIGNED (type);
+ bool negative = ((val >> (bits - 1)) & 1) != 0;
+ tree ival;
+
+ if (signed_p && negative)
+ {
+ val = val | (~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
+ ival = build_int_2 (val, -1);
+ }
+ else
+ {
+ val = val & ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
+ ival = build_int_2 (val, 0);
+ }
+
+ return convert (type, ival);
+}
+
+/* Convert a lambda body vector to a gcc tree. */
+
+static tree
+lbv_to_gcc_expression (lambda_body_vector lbv,
+ varray_type induction_vars,
+ tree *stmts_to_insert)
+{
+ tree stmts, stmt, resvar, name;
+ size_t i;
+ tree_stmt_iterator tsi;
+
+ /* Create a statement list and a linear expression temporary. */
+ stmts = alloc_stmt_list ();
+ resvar = create_tmp_var (integer_type_node, "lletmp");
+ add_referenced_tmp_var (resvar);
+
+ /* Start at 0. */
+ stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
+ name = make_ssa_name (resvar, stmt);
+ TREE_OPERAND (stmt, 0) = name;
+ tsi = tsi_last (stmts);
+ tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
+
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (induction_vars); i++)
+ {
+ if (LBV_COEFFICIENTS (lbv)[i] != 0)
+ {
+ tree newname;
+
+ /* newname = coefficient * induction_variable */
+ stmt = build (MODIFY_EXPR, void_type_node, resvar,
+ fold (build (MULT_EXPR, integer_type_node,
+ VARRAY_TREE (induction_vars, i),
+ build_int_cst (integer_type_node,
+ LBV_COEFFICIENTS (lbv)[i]))));
+ newname = make_ssa_name (resvar, stmt);
+ TREE_OPERAND (stmt, 0) = newname;
+ tsi = tsi_last (stmts);
+ tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
+ /* name = name + newname */
+ stmt = build (MODIFY_EXPR, void_type_node, resvar,
+ build (PLUS_EXPR, integer_type_node,
+ name, newname));
+ name = make_ssa_name (resvar, stmt);
+ TREE_OPERAND (stmt, 0) = name;
+ tsi = tsi_last (stmts);
+ tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
+ }
+ }
+
+ /* Handle any denominator that occurs. */
+ if (LBV_DENOMINATOR (lbv) != 1)
+ {
+#if 0
+ if (wrap == MAX_EXPR)
+#endif
+ stmt = build (MODIFY_EXPR, void_type_node, resvar,
+ build (CEIL_DIV_EXPR, integer_type_node,
+ name, build_int_cst (integer_type_node,
+ LBV_DENOMINATOR (lbv))));
+#if 0
+ else if (wrap == MIN_EXPR)
+ stmt = build (MODIFY_EXPR, void_type_node, resvar,
+ build (FLOOR_DIV_EXPR, integer_type_node,
+ name, build_int_cst (integer_type_node,
+ LBV_DENOMINATOR (lbv))));
+ else
+ abort ();
+#endif
+ name = make_ssa_name (resvar, stmt);
+ TREE_OPERAND (stmt, 0) = name;
+ tsi = tsi_last (stmts);
+ tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
+ }
+ *stmts_to_insert = stmts;
+ return name;
+}
+
+
+/* Convert a linear expression from coefficient and constant form to a
+ gcc tree. This will likely generate trees that are badly in need
+ of constant and copy propagation. */
+
+static tree
+lle_to_gcc_expression (lambda_linear_expression lle,
+ varray_type induction_vars,
+ enum tree_code wrap,
+ tree *stmts_to_insert)
+{
+ tree stmts, stmt, resvar, name;
+ size_t i;
+ tree_stmt_iterator tsi;
+ varray_type results;
+
+ name = NULL_TREE;
+ /* Create a statement list and a linear expression temporary. */
+ stmts = alloc_stmt_list ();
+ resvar = create_tmp_var (integer_type_node, "lletmp");
+ add_referenced_tmp_var (resvar);
+ VARRAY_TREE_INIT (results, 1, "Results");
+
+ for (; lle != NULL; lle = LLE_NEXT (lle))
+ {
+ /* Start at 0. */
+ stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
+ name = make_ssa_name (resvar, stmt);
+ TREE_OPERAND (stmt, 0) = name;
+ tsi = tsi_last (stmts);
+ tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
+
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (induction_vars); i++)
+ {
+ if (LLE_COEFFICIENTS (lle)[i] != 0)
+ {
+ tree newname;
+ tree mult;
+ tree coeff;
+ coeff = build_int_cst (integer_type_node,
+ LLE_COEFFICIENTS (lle)[i]);
+ mult = fold (build (MULT_EXPR, integer_type_node,
+ VARRAY_TREE (induction_vars, i), coeff));
+ /* newname = coefficient * induction_variable */
+ stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
+ newname = make_ssa_name (resvar, stmt);
+ TREE_OPERAND (stmt, 0) = newname;
+ tsi = tsi_last (stmts);
+ tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
+ /* name = name + newname */
+ stmt = build (MODIFY_EXPR, void_type_node, resvar,
+ build (PLUS_EXPR, integer_type_node,
+ name, newname));
+ name = make_ssa_name (resvar, stmt);
+ TREE_OPERAND (stmt, 0) = name;
+ tsi = tsi_last (stmts);
+ tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
+ }
+ }
+
+ /* Now handle the constant.
+ name = name + constant. */
+ if (LLE_CONSTANT (lle) != 0)
+ {
+ stmt = build (MODIFY_EXPR, void_type_node, resvar,
+ build (PLUS_EXPR, integer_type_node,
+ name, build_int_cst (integer_type_node,
+ LLE_CONSTANT (lle))));
+ name = make_ssa_name (resvar, stmt);
+ TREE_OPERAND (stmt, 0) = name;
+ tsi = tsi_last (stmts);
+ tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
+ }
+
+ /* Handle any denominator that occurs. */
+ if (LLE_DENOMINATOR (lle) != 1)
+ {
+ if (wrap == MAX_EXPR)
+ stmt = build (MODIFY_EXPR, void_type_node, resvar,
+ build (CEIL_DIV_EXPR, integer_type_node,
+ name, build_int_cst (integer_type_node,
+ LLE_DENOMINATOR (lle))));
+ else if (wrap == MIN_EXPR)
+ stmt = build (MODIFY_EXPR, void_type_node, resvar,
+ build (FLOOR_DIV_EXPR, integer_type_node,
+ name, build_int_cst (integer_type_node,
+ LLE_DENOMINATOR (lle))));
+ else
+ abort ();
+ name = make_ssa_name (resvar, stmt);
+ TREE_OPERAND (stmt, 0) = name;
+ tsi = tsi_last (stmts);
+ tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
+ }
+ VARRAY_PUSH_TREE (results, name);
+ }
+
+ /* Again, out of lazyness, we don't handle this case yet. It's not
+ hard, it just hasn't occurred. */
+ if (VARRAY_ACTIVE_SIZE (results) > 2)
+ abort ();
+
+ /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
+ if (VARRAY_ACTIVE_SIZE (results) > 1)
+ {
+ tree op1 = VARRAY_TREE (results, 0);
+ tree op2 = VARRAY_TREE (results, 1);
+ stmt = build (MODIFY_EXPR, void_type_node, resvar,
+ build (wrap, integer_type_node, op1, op2));
+ name = make_ssa_name (resvar, stmt);
+ TREE_OPERAND (stmt, 0) = name;
+ tsi = tsi_last (stmts);
+ tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
+ }
+
+ *stmts_to_insert = stmts;
+ return name;
+}
+
+/* Transform a lambda loopnest back into code. This changes the
+ loops, their induction variables, and their bodies, so that they
+ match the transformed loopnest. */
+void
+lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
+ varray_type old_ivs,
+ lambda_loopnest new_loopnest,
+ lambda_trans_matrix transform)
+{
+
+ struct loop *temp;
+ size_t i = 0;
+ size_t depth = 0;
+ varray_type new_ivs;
+ block_stmt_iterator bsi;
+ basic_block *bbs;
+
+ if (tree_dump_file)
+ {
+ transform = lambda_trans_matrix_inverse (transform);
+ fprintf (tree_dump_file, "Inverse of transformation matrix:\n");
+ print_lambda_trans_matrix (tree_dump_file, transform);
+ }
+ temp = old_loopnest;
+ VARRAY_GENERIC_PTR_INIT (new_ivs, 1, "New induction variables");
+ while (temp)
+ {
+ temp = temp->inner;
+ depth++;
+ }
+ temp = old_loopnest;
+
+ while (temp)
+ {
+ lambda_loop newloop;
+ basic_block bb;
+ tree ivvar, ivvarinced, exitcond, stmts;
+ tree newupperbound, newlowerbound;
+ lambda_linear_expression offset;
+ /* First, build the new induction variable temporary */
+
+ ivvar = create_tmp_var (integer_type_node, "lnivtmp");
+ add_referenced_tmp_var (ivvar);
+
+ VARRAY_PUSH_GENERIC_PTR (new_ivs, ivvar);
+
+ newloop = LN_LOOPS (new_loopnest)[i];
+
+ /* Linear offset is a bit tricky to handle. */
+ offset = LL_LINEAR_OFFSET (newloop);
+
+ if (LLE_DENOMINATOR (offset) != 1
+ || !lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth))
+ abort ();
+
+ /* Now build the new lower bounds, and insert the statements
+ necessary to generate it on the loop preheader. */
+ newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop), new_ivs,
+ MAX_EXPR, &stmts);
+ bsi_insert_on_edge_immediate (loop_preheader_edge (temp), stmts);
+
+ /* Build the new upper bound and insert its statements in the
+ basic block of the exit condition */
+ newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop), new_ivs,
+ MIN_EXPR, &stmts);
+ /* XXX Is this right, or do we want before the first statement? */
+ exitcond = get_loop_exit_condition (temp);
+ bb = bb_for_stmt (exitcond);
+ bsi = bsi_start (bb);
+ bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
+
+ /* Create the new iv, and insert it's increment on the latch
+ block. */
+
+ bb = temp->latch->pred->src;
+ bsi = bsi_last (bb);
+ create_iv (newlowerbound,
+ build_int_cst (integer_type_node, LL_STEP (newloop)),
+ ivvar, temp, &bsi, false, &ivvar, &ivvarinced);
+
+ /* Replace the exit condition with the new upper bound
+ comparison. */
+ COND_EXPR_COND (exitcond) = build (LE_EXPR,
+ boolean_type_node,
+ ivvarinced, newupperbound);
+ modify_stmt (exitcond);
+ VARRAY_GENERIC_PTR (new_ivs, i) = ivvar;
+
+ i++;
+ temp = temp->inner;
+ }
+
+ /* Go through the loop and make iv replacements. */
+ bbs = get_loop_body (old_loopnest);
+ for (i = 0; i < old_loopnest->num_nodes; i++)
+ for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
+ {
+ tree stmt = bsi_stmt (bsi);
+ use_optype uses;
+ size_t j;
+
+ get_stmt_operands (stmt);
+ uses = STMT_USE_OPS (stmt);
+ for (j = 0; j < NUM_USES (uses); j++)
+ {
+ size_t k;
+ tree *use = USE_OP_PTR (uses, j);
+ for (k = 0; k < VARRAY_ACTIVE_SIZE (old_ivs); k++)
+ {
+ tree oldiv = VARRAY_TREE (old_ivs, k);
+ if (SSA_NAME_VAR (*use) == SSA_NAME_VAR (oldiv))
+ {
+ tree newiv, stmts;
+ lambda_body_vector lbv;
+
+ /* Compute the new expression for the induction
+ variable. */
+ depth = VARRAY_ACTIVE_SIZE (new_ivs);
+ lbv = lambda_body_vector_new (depth);
+ LBV_COEFFICIENTS (lbv)[k] = 1;
+ lbv = lambda_body_vector_compute_new (transform, lbv);
+ newiv = lbv_to_gcc_expression (lbv, new_ivs, &stmts);
+
+ /* Insert the statements to build that
+ expression. */
+ bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
+
+ /* Replace the use with the result of that
+ expression. */
+ if (tree_dump_file)
+ {
+ fprintf (tree_dump_file,
+ "Replacing induction variable use of ");
+ print_generic_stmt (tree_dump_file, *use, 0);
+ fprintf (tree_dump_file, " with ");
+ print_generic_stmt (tree_dump_file, newiv, 0);
+ fprintf (tree_dump_file, "\n");
+ }
+ *use = newiv;
+ }
+ }
+
+ }
+ }
+}
+
Index: lambda-mat.c
===================================================================
RCS file: lambda-mat.c
diff -N lambda-mat.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ lambda-mat.c 4 Mar 2004 21:30:48 -0000
@@ -0,0 +1,568 @@
+/* Integer matrix math routines
+ Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+ Contributed by Daniel Berlin <dberlin@dberlin.org>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA. */
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "ggc.h"
+#include "varray.h"
+#include "lambda.h"
+
+static void lambda_matrix_get_column (lambda_matrix, int, int,
+ lambda_vector);
+
+/* Allocate a matrix of M rows x N cols. */
+
+lambda_matrix
+lambda_matrix_new (int m, int n)
+{
+ lambda_matrix mat;
+ int i;
+
+ mat = ggc_alloc_cleared (m * sizeof (int));
+
+ for (i = 0; i < m; i++)
+ mat[i] = lambda_vector_new (n);
+
+ return mat;
+}
+
+/* Copy the elements of M x N matrix MAT1 to MAT2. */
+
+void
+lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
+ int m, int n)
+{
+ int i;
+
+ for (i = 0; i < m; i++)
+ lambda_vector_copy (mat1[i], mat2[i], n);
+}
+
+/* Store the N x N identity matrix in MAT. */
+
+void
+lambda_matrix_id (lambda_matrix mat, int size)
+{
+ int i, j;
+
+ for (i = 0; i < size; i++)
+ for (j = 0; j < size; j++)
+ mat[i][j] = (i == j) ? 1 : 0;
+}
+
+/* Negate the elements of the M x N matrix MAT1 and store it in MAT2. */
+
+void
+lambda_matrix_negate (lambda_matrix mat1, lambda_matrix mat2, int m, int n)
+{
+ int i;
+
+ for (i = 0; i < m; i++)
+ lambda_vector_negate (mat1[i], mat2[i], n);
+}
+
+/* Take the transpose of matrix MAT1 and store it in MAT2.
+ MAT1 is an M x N matrix, so MAT2 must be N x M. */
+
+void
+lambda_matrix_transpose (lambda_matrix mat1, lambda_matrix mat2, int m, int n)
+{
+ int i, j;
+
+ for (i = 0; i < n; i++)
+ for (j = 0; j < m; j++)
+ mat2[i][j] = mat1[j][i];
+}
+
+
+/* Add two M x N matrices together: MAT3 = MAT1+MAT2. */
+
+void
+lambda_matrix_add (lambda_matrix mat1, lambda_matrix mat2,
+ lambda_matrix mat3, int m, int n)
+{
+ int i;
+
+ for (i = 0; i < m; i++)
+ lambda_vector_add (mat1[i], mat2[i], mat3[i], n);
+}
+
+/* MAT3 = CONST1 * MAT1 + CONST2 * MAT2. All matrices are M x N. */
+
+void
+lambda_matrix_add_mc (lambda_matrix mat1, int const1,
+ lambda_matrix mat2, int const2,
+ lambda_matrix mat3, int m, int n)
+{
+ int i;
+
+ for (i = 0; i < m; i++)
+ lambda_vector_add_mc (mat1[i], const1, mat2[i], const2, mat3[i], n);
+}
+
+/* Multiply two matrices: MAT3 = MAT1 * MAT2.
+ MAT1 is an M x R matrix, and MAT2 is R x N. The resulting MAT2
+ must therefore be M x N. */
+
+void
+lambda_matrix_mult (lambda_matrix mat1, lambda_matrix mat2,
+ lambda_matrix mat3, int m, int r, int n)
+{
+
+ int i, j, k;
+ lambda_vector row1, row3;
+
+ for (i = 0; i < m; i++)
+ {
+ row3 = mat3[i];
+ for (j = 0; j < n; j++)
+ {
+ row1 = mat1[i];
+ row3[j] = 0;
+ for (k = 0; k < r; k++)
+ row3[j] += row1[k] * mat2[k][j];
+ }
+ }
+}
+
+/* Get column COL from the matrix MAT and store it in VEC. MAT has
+ N rows, so the length of VEC must be N. */
+
+static void
+lambda_matrix_get_column (lambda_matrix mat, int n, int col,
+ lambda_vector vec)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ vec[i] = mat[i][col];
+}
+
+/* Delete rows r1 to r2 (not including r2). TODO */
+
+void
+lambda_matrix_delete_rows (lambda_matrix mat, int rows, int from, int to)
+{
+ int i, d;
+ d = to - from;
+
+ for (i = to; i < rows; i++)
+ mat[i - d] = mat[i];
+
+ for (i = rows - d; i < rows; i++)
+ mat[i] = NULL;
+}
+
+/* Swap rows R1 and R2 in matrix MAT. */
+
+void
+lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
+{
+ lambda_vector row;
+
+ row = mat[r1];
+ mat[r1] = mat[r2];
+ mat[r2] = row;
+}
+
+/* Add a multiple of row R1 of matrix MAT with N columns to row R2:
+ R2 = R2 + CONST1 * R1. */
+
+void
+lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
+{
+ int i;
+ lambda_vector row1, row2;
+
+ if (const1 == 0)
+ return;
+
+ row1 = mat[r1];
+ row2 = mat[r2];
+
+ for (i = 0; i < n; i++)
+ row2[i] += const1 * row1[i];
+}
+
+/* Negate row R1 of matrix MAT which has N columns. */
+
+void
+lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
+{
+ lambda_vector_negate (mat[r1], mat[r1], n);
+}
+
+/* Multiply row R1 of matrix MAT with N columns by CONST1. */
+
+void
+lambda_matrix_row_mc (lambda_matrix mat, int n, int r1, int const1)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ mat[r1][i] *= const1;
+}
+
+/* Exchange COL1 and COL2 in matrix MAT. M is the number of rows. */
+
+void
+lambda_matrix_col_exchange (lambda_matrix mat, int m, int col1, int col2)
+{
+ lambda_vector row;
+ int i;
+ int tmp;
+ for (i = 0; i < m; i++)
+ {
+ row = mat[i];
+ tmp = row[col1];
+ row[col1] = row[col2];
+ row[col2] = tmp;
+ }
+}
+
+/* Add a multiple of column C1 of matrix MAT with M rows to column C2:
+ C2 = C1 + CONST1 * C2. */
+
+void
+lambda_matrix_col_add (lambda_matrix mat, int m, int c1, int c2, int const1)
+{
+ int i;
+
+ if (const1 == 0)
+ return;
+
+ for (i = 0; i < m; i++)
+ mat[i][c2] += const1 * mat[i][c1];
+}
+
+/* Negate column C1 of matrix MAT which has M rows. */
+
+void
+lambda_matrix_col_negate (lambda_matrix mat, int m, int c1)
+{
+ int i;
+
+ for (i = 0; i < m; i++)
+ mat[i][c1] *= -1;
+}
+
+/* Multiply column C1 of matrix MAT with M rows by CONST1. */
+
+void
+lambda_matrix_col_mc (lambda_matrix mat, int m, int c1, int const1)
+{
+ int i;
+
+ for (i = 0; i < m; i++)
+ mat[i][c1] *= const1;
+}
+
+/* Compute the inverse of the N x N matrix MAT and store it in INV.
+
+ We don't _really_ compute the inverse of MAT. Instead we compute
+ det(MAT)*inv(MAT), and we return det(MAT) to the caller as the function
+ result. This is necessary to preserve accuracy, because we are dealing
+ with integer matrices here.
+
+ The algorithm used here is a column based Gauss-Jordan elimination on MAT
+ and the identity matrix in parallel. The inverse is the result of applying
+ the same operations on the identity matrix that reduce MAT to the identity
+ matrix.
+
+ When MAT is a 2 x 2 matrix, we don't go through the whole process, because
+ it is easily inverted by inspection and it is a very common case. */
+
+static int lambda_matrix_inverse_hard (lambda_matrix, lambda_matrix, int);
+
+int
+lambda_matrix_inverse (lambda_matrix mat, lambda_matrix inv, int n)
+{
+ if (n == 2)
+ {
+ int a, b, c, d, det;
+ a = mat[0][0];
+ b = mat[1][0];
+ c = mat[0][1];
+ d = mat[1][1];
+ inv[0][0] = d;
+ inv[0][1] = -c;
+ inv[1][0] = -b;
+ inv[1][1] = a;
+ det = (a * d - b * c);
+ if (det < 0)
+ {
+ det *= -1;
+ inv[0][0] *= -1;
+ inv[1][0] *= -1;
+ inv[0][1] *= -1;
+ inv[1][1] *= -1;
+ }
+ return det;
+ }
+ else
+ return lambda_matrix_inverse_hard (mat, inv, n);
+}
+
+/* If MAT is not a special case, invert it the hard way. */
+
+static int
+lambda_matrix_inverse_hard (lambda_matrix mat, lambda_matrix inv, int n)
+{
+ lambda_vector row;
+ lambda_matrix temp;
+ int i, j;
+ int determinant;
+
+ temp = lambda_matrix_new (n, n);
+ lambda_matrix_copy (mat, temp, n, n);
+ lambda_matrix_id (inv, n);
+
+ /* Reduce TEMP to a lower triangular form, applying the same operations on
+ INV which starts as the identity matrix. N is the number of rows and
+ columns. */
+ for (j = 0; j < n; j++)
+ {
+ row = temp[j];
+
+ /* Make every element in the current row positive. */
+ for (i = j; i < n; i++)
+ if (row[i] < 0)
+ {
+ lambda_matrix_col_negate (temp, n, i);
+ lambda_matrix_col_negate (inv, n, i);
+ }
+
+ /* Sweep the upper triangle. Stop when only the diagonal element in the
+ current row is nonzero. */
+ while (lambda_vector_first_nz (row, n, j + 1) < n)
+ {
+ int min_col = lambda_vector_min_nz (row, n, j);
+ lambda_matrix_col_exchange (temp, n, j, min_col);
+ lambda_matrix_col_exchange (inv, n, j, min_col);
+
+ for (i = j + 1; i < n; i++)
+ {
+ int factor;
+
+ factor = -1 * row[i];
+ if (row[j] != 1)
+ factor /= row[j];
+
+ lambda_matrix_col_add (temp, n, j, i, factor);
+ lambda_matrix_col_add (inv, n, j, i, factor);
+ }
+ }
+ }
+
+ /* Reduce TEMP from a lower triangular to the identity matrix. Also compute
+ the determinant, which now is simply the product of the elements on the
+ diagonal of TEMP. If one of these elements is 0, the matrix has 0 as an
+ eigenvalue so it is singular and hence not invertible. */
+ determinant = 1;
+ for (j = n - 1; j >= 0; j--)
+ {
+ int diagonal;
+
+ row = temp[j];
+ diagonal = row[j];
+
+ /* If the matrix is singular, abort. */
+ if (diagonal == 0)
+ abort ();
+
+ determinant = determinant * diagonal;
+
+ /* If the diagonal is not 1, then multiply the each row by the
+ diagonal so that the middle number is now 1, rather than a
+ rational. */
+ if (diagonal != 1)
+ {
+ for (i = 0; i < j; i++)
+ lambda_matrix_col_mc (inv, n, i, diagonal);
+ for (i = j + 1; i < n; i++)
+ lambda_matrix_col_mc (inv, n, i, diagonal);
+
+ row[j] = diagonal = 1;
+ }
+
+ /* Sweep the lower triangle column wise. */
+ for (i = j - 1; i >= 0; i--)
+ {
+ if (row[i])
+ {
+ int factor = -row[i];
+ lambda_matrix_col_add (temp, n, j, i, factor);
+ lambda_matrix_col_add (inv, n, j, i, factor);
+ }
+
+ }
+ }
+
+ return determinant;
+}
+
+/* Decompose mat to a product of a lower triangular and a unimodular
+ matrix. */
+
+void
+lambda_matrix_hermite (lambda_matrix mat, int n,
+ lambda_matrix H, lambda_matrix U)
+{
+ lambda_vector row;
+ int i, j, factor, minimum_col;
+
+ lambda_matrix_copy (mat, H, n, n);
+ lambda_matrix_id (U, n);
+
+ for (j = 0; j < n; j++)
+ {
+ row = H[j];
+
+ /* Make every element of H[j][j..n] positive. */
+ for (i = j; i < n; i++)
+ {
+ if (row[i] < 0)
+ {
+ lambda_matrix_col_negate (H, n, i);
+ lambda_vector_negate (U[i], U[i], n);
+ }
+ }
+
+ /* Stop when only the diagonal element is non-zero. */
+ while (lambda_vector_first_nz (row, n, j + 1) < n)
+ {
+ minimum_col = lambda_vector_min_nz (row, n, j);
+ lambda_matrix_col_exchange (H, n, j, minimum_col);
+ lambda_matrix_row_exchange (U, j, minimum_col);
+
+ for (i = j + 1; i < n; i++)
+ {
+ factor = row[i] / row[j];
+ lambda_matrix_col_add (H, n, j, i, -1 * factor);
+ lambda_matrix_row_add (U, n, i, j, factor);
+ }
+ }
+ }
+}
+
+/* Find the first non-zero vector in mat, if found.
+ return rowsize if not found. */
+
+int
+lambda_matrix_first_nz_vec (lambda_matrix mat, int rowsize, int colsize,
+ int startrow)
+{
+
+ int j;
+ bool found = false;
+
+ for (j = startrow; (j < rowsize) && !found; j++)
+ {
+ if ((mat[j] != NULL)
+ && (lambda_vector_first_nz (mat[j], colsize, startrow) < colsize))
+ found = true;
+ }
+
+ if (found)
+ return j - 1;
+ return rowsize;
+}
+
+/* Calculate the projection of E sub k to the null space of B. */
+
+void
+lambda_matrix_project_to_null (lambda_matrix B, int rowsize,
+ int colsize, int k, lambda_vector x)
+{
+ lambda_matrix M1, M2, M3, I;
+ int determinant;
+
+ /* compute c(I-B^T inv(B B^T) B) e sub k */
+
+ /* M1 is the transpose of B */
+ M1 = lambda_matrix_new (colsize, colsize);
+ lambda_matrix_transpose (B, M1, rowsize, colsize);
+
+ /* M2 = B * B^T */
+ M2 = lambda_matrix_new (colsize, colsize);
+ lambda_matrix_mult (B, M1, M2, rowsize, colsize, rowsize);
+
+ /* M3 = inv(M2) */
+ M3 = lambda_matrix_new (colsize, colsize);
+ determinant = lambda_matrix_inverse (M2, M3, rowsize);
+
+ /* M2 = B^T (inv(B B^T)) */
+ lambda_matrix_mult (M1, M3, M2, colsize, rowsize, rowsize);
+
+ /* M1 = B^T (inv(B B^T)) B */
+ lambda_matrix_mult (M2, B, M1, colsize, rowsize, colsize);
+ lambda_matrix_negate (M1, M1, colsize, colsize);
+
+ I = lambda_matrix_new (colsize, colsize);
+ lambda_matrix_id (I, colsize);
+
+ lambda_matrix_add_mc (I, determinant, M1, 1, M2, colsize, colsize);
+
+ lambda_matrix_get_column (M2, colsize, k - 1, x);
+
+}
+
+/* Multiply a vector VEC by a matrix MAT.
+ MAT is an M*N matrix, and VEC is a vector with length N. The result
+ is stored in DEST which must be a vector of length M. */
+
+void
+lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
+ lambda_vector vec, lambda_vector dest)
+{
+ int i, j;
+
+ lambda_vector_clear (dest, m);
+ for (i = 0; i < m; i++)
+ for (j = 0; j < n; j++)
+ dest[i] += matrix[i][j] * vec[j];
+}
+
+/* Print out a vector VEC of length N to OUTFILE. */
+
+void
+print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ fprintf (outfile, "%3d ", vector[i]);
+ fprintf (outfile, "\n");
+}
+
+/* Print out an M x N matrix MAT to OUTFILE. */
+
+void
+print_lambda_matrix (FILE * outfile, lambda_matrix matrix, int m, int n)
+{
+ int i;
+
+ for (i = 0; i < m; i++)
+ print_lambda_vector (outfile, matrix[i], n);
+ fprintf (outfile, "\n");
+}
+
Index: lambda-trans.c
===================================================================
RCS file: lambda-trans.c
diff -N lambda-trans.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ lambda-trans.c 4 Mar 2004 21:30:48 -0000
@@ -0,0 +1,294 @@
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "errors.h"
+#include "ggc.h"
+#include "tree.h"
+#include "target.h"
+#include "varray.h"
+#include "lambda.h"
+
+/* Allocate a new transformation matrix. */
+
+lambda_trans_matrix
+lambda_trans_matrix_new (int colsize, int rowsize)
+{
+ lambda_trans_matrix ret;
+
+ ret = xcalloc (1, sizeof (*ret));
+ LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize);
+ LTM_ROWSIZE (ret) = rowsize;
+ LTM_COLSIZE (ret) = colsize;
+ LTM_DENOMINATOR (ret) = 1;
+ return ret;
+}
+
+/* Return true if the transformation matrix is nonsingular. */
+
+bool
+lambda_trans_matrix_is_nonsingular (lambda_trans_matrix t)
+{
+ return lambda_trans_matrix_is_fullrank (t);
+}
+
+
+/* Return true if the transformation matrix is full row rank. */
+
+bool
+lambda_trans_matrix_is_fullrank (lambda_trans_matrix t)
+{
+ return (lambda_trans_matrix_rank (t) == LTM_ROWSIZE (t));
+}
+
+/* Compute the rank of the matrix. */
+
+int
+lambda_trans_matrix_rank (lambda_trans_matrix t)
+{
+ lambda_matrix partial;
+ int rowsize, colsize;
+ int i, j, nextrow;
+
+ lambda_matrix tempmatrix;
+ lambda_vector row;
+ int minimum_column, factor;
+
+ partial = LTM_MATRIX (t);
+ rowsize = LTM_ROWSIZE (t);
+ colsize = LTM_COLSIZE (t);
+
+ tempmatrix = lambda_matrix_new (rowsize, colsize);
+ lambda_matrix_copy (partial, tempmatrix, rowsize, colsize);
+
+ j = 0;
+ while ((j < colsize) && (j < rowsize))
+ {
+ /* Considering the submatrix A[j:m, j:n], search for the first
+ row k >= k such that A[k, j:n] != 0 */
+
+ nextrow = lambda_matrix_first_nz_vec (tempmatrix, rowsize, colsize, j);
+
+ if (nextrow != j)
+ return j;
+
+ /* Delete rows j .. nextrow - 1 */
+
+ lambda_matrix_delete_rows (tempmatrix, rowsize, j, nextrow);
+ lambda_matrix_delete_rows (partial, rowsize, j, nextrow);
+
+ rowsize = rowsize - nextrow + j;
+
+ /* Nextrow becomes row j+1 in the matrix, but not necessary row
+ j + 1 in the array. */
+
+ /* Apply elementary column operations to make the diagonal
+ element non-zero and the others zero. */
+
+ row = tempmatrix[j];
+
+ /* Make every element of tempmatrix[j, j:colsize] positive. */
+
+ for (i = j; i < colsize; i++)
+ if (row[i] < 0)
+ lambda_matrix_col_negate (tempmatrix, rowsize, i);
+
+ /* Stop only when the diagonal element is non-zero. */
+ while (lambda_vector_first_nz (row, colsize, j+1) < colsize)
+ {
+ minimum_column = lambda_vector_min_nz (row, colsize, j);
+ lambda_matrix_col_exchange (tempmatrix, rowsize, j, minimum_column);
+
+ for (i = j + 1; i < colsize; i++)
+ {
+ if (row[i])
+ {
+ factor = row[i] / row[j];
+ /* Add (-1) * factor of column j to column i. */
+ lambda_matrix_col_add (tempmatrix, rowsize,
+ j, i, (-1) * factor);
+ }
+ }
+ }
+ j++;
+ }
+
+ return rowsize;
+}
+
+
+/* Compute the base matrix. */
+
+lambda_trans_matrix
+lambda_trans_matrix_base (lambda_trans_matrix mat)
+{
+ int rowsize, colsize;
+ int i, j, nextrow;
+ lambda_matrix partial, tempmatrix;
+ lambda_vector row;
+ int minimum_column, factor;
+
+ lambda_trans_matrix base;
+
+ rowsize = LTM_ROWSIZE (mat);
+ colsize = LTM_COLSIZE (mat);
+ base = lambda_trans_matrix_new (rowsize, colsize);
+ partial = LTM_MATRIX (base);
+ lambda_matrix_copy (LTM_MATRIX (mat), partial, rowsize, colsize);
+ tempmatrix = lambda_matrix_new (rowsize, colsize);
+ lambda_matrix_copy (partial, tempmatrix, rowsize, colsize);
+
+ j = 0;
+
+ while ((j < colsize)
+ && (nextrow = lambda_matrix_first_nz_vec (tempmatrix,
+ rowsize,
+ colsize, j)) < rowsize)
+ {
+ /* Consider the submatrix A[j:m, j:n].
+ Search for the first row k >= j such that A[k, j:n] != 0. */
+
+ /* Delete rows j .. nextrow - 1. */
+ lambda_matrix_delete_rows (tempmatrix, rowsize, j, nextrow);
+ lambda_matrix_delete_rows (partial, rowsize, j, nextrow);
+
+ /* nextrow becomes row j+1 in the matrix, though not necessarily
+ row j+1 in the array. */
+ /* Apply elementary column oeprations to make the diagonal
+ element nonzero and the others zero. */
+ row = tempmatrix[j];
+
+ /* Make every element of tempmatrix[j, j:colsize] positive. */
+
+ for (i = j; i < colsize; i++)
+ if (row[i] < 0)
+ lambda_matrix_col_negate (tempmatrix, rowsize, i);
+
+ /* Stop when only the diagonal element is nonzero. */
+
+ while (lambda_vector_first_nz (row, colsize, j+1) < colsize)
+ {
+ minimum_column = lambda_vector_min_nz (row, colsize, j);
+ lambda_matrix_col_exchange (tempmatrix, rowsize, j, minimum_column);
+
+ for (i = j + 1; i < colsize; i++)
+ {
+ if (row[i])
+ {
+ factor = row[i] / row[j];
+ /* Add (-1) * factor of column j to column i. */
+ lambda_matrix_col_add (tempmatrix, rowsize,
+ j, i, (-1) * factor);
+ }
+ }
+ }
+ j++;
+ }
+ /* Store the rank. */
+ LTM_ROWSIZE (base) = j;
+ return base;
+}
+
+/* Pad the legal base matrix to an invertable matrix. */
+
+lambda_trans_matrix
+lambda_trans_matrix_padding (lambda_trans_matrix matrix)
+{
+ int i, k;
+ int currrow, minimum_column, factor;
+
+ lambda_matrix tempmatrix, padmatrix;
+ lambda_vector row;
+
+ lambda_trans_matrix padded;
+ lambda_matrix partial;
+ int rowsize, colsize;
+
+ rowsize = LTM_ROWSIZE (matrix);
+ colsize = LTM_COLSIZE (matrix);
+
+ padded = lambda_trans_matrix_new (rowsize, colsize);
+ partial = LTM_MATRIX (padded);
+ lambda_matrix_copy(LTM_MATRIX (matrix), partial, rowsize, colsize);
+
+ /* full rank, no need for padding */
+ if (rowsize==colsize)
+ return(padded);
+
+ tempmatrix = lambda_matrix_new (rowsize, colsize);
+ lambda_matrix_copy (partial, tempmatrix, rowsize, colsize);
+
+ padmatrix = lambda_matrix_new (colsize, colsize);
+ lambda_matrix_id (padmatrix, colsize);
+
+ for(currrow = 0; currrow < rowsize; currrow++)
+ {
+ /* consider the submatrix A[i:m, i:n]. */
+
+ /* apply elementary column operations to make the diag element nonzero
+ and others zero. */
+
+ /* only consider columns from currrow to colsize. */
+
+ row = tempmatrix[currrow];
+
+ /* make every element of tempmatrix[currrow, currrow:colsize]
+ positive. */
+
+ for(i = currrow; i < colsize; i++)
+ if(row[i] < 0)
+ lambda_matrix_col_negate (tempmatrix, rowsize, i);
+
+ /* stop when only the diagonal element is nonzero */
+ while (lambda_vector_first_nz (row, colsize, currrow + 1) < colsize)
+ {
+ minimum_column = lambda_vector_min_nz (row, colsize, currrow);
+
+ lambda_matrix_col_exchange (tempmatrix, rowsize, currrow,
+ minimum_column);
+ lambda_matrix_row_exchange (padmatrix, currrow, minimum_column);
+
+ for (i = currrow + 1; i < colsize; i++)
+ {
+ if(row[i])
+ {
+ factor = row[i] / row[currrow];
+ lambda_matrix_col_add (tempmatrix, rowsize,
+ currrow, i, (-1) * factor);
+ }
+ }
+ }
+ }
+
+
+ for(k = rowsize; k < colsize; k++)
+ partial[k] = padmatrix[k];
+
+ return(padded);
+}
+
+/* Compute the inverse of the transformation. */
+
+lambda_trans_matrix
+lambda_trans_matrix_inverse (lambda_trans_matrix mat)
+{
+ lambda_trans_matrix inverse;
+ int determinant;
+
+ inverse = lambda_trans_matrix_new (LTM_ROWSIZE (mat), LTM_COLSIZE (mat));
+ determinant = lambda_matrix_inverse (LTM_MATRIX (mat), LTM_MATRIX (inverse),
+ LTM_ROWSIZE (mat));
+ LTM_DENOMINATOR (inverse) = determinant;
+ return inverse;
+}
+
+
+/* Print out a transformation matrix. */
+
+void
+print_lambda_trans_matrix (FILE *outfile, lambda_trans_matrix mat)
+{
+ print_lambda_matrix (outfile, LTM_MATRIX (mat), LTM_ROWSIZE (mat),
+ LTM_COLSIZE (mat));
+}
Index: lambda.h
===================================================================
RCS file: lambda.h
diff -N lambda.h
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ lambda.h 4 Mar 2004 21:30:48 -0000
@@ -0,0 +1,301 @@
+#ifndef LAMBDA_H
+#define LAMBDA_H
+
+typedef int *lambda_vector;
+typedef lambda_vector *lambda_matrix;
+
+/* A transformation matrix. */
+typedef struct
+{
+ lambda_matrix matrix;
+ int rowsize;
+ int colsize;
+ int denominator;
+} *lambda_trans_matrix;
+#define LTM_MATRIX(T) ((T)->matrix)
+#define LTM_ROWSIZE(T) ((T)->rowsize)
+#define LTM_COLSIZE(T) ((T)->colsize)
+#define LTM_DENOMINATOR(T) ((T)->denominator)
+
+/* A vector representing a statement in the body of a loop. */
+typedef struct
+{
+ lambda_vector coefficients;
+ int size;
+ int denominator;
+} *lambda_body_vector;
+#define LBV_COEFFICIENTS(T) ((T)->coefficients)
+#define LBV_SIZE(T) ((T)->size)
+#define LBV_DENOMINATOR(T) ((T)->denominator)
+
+/* Piecewise linear expression. */
+typedef struct lambda_linear_expression_s
+{
+ lambda_vector coefficients;
+ int constant;
+ lambda_vector invariant_coefficients;
+ int denominator;
+ struct lambda_linear_expression_s *next;
+} *lambda_linear_expression;
+
+#define LLE_COEFFICIENTS(T) ((T)->coefficients)
+#define LLE_CONSTANT(T) ((T)->constant)
+#define LLE_INVARIANT_COEFFICIENTS(T) ((T)->invariant_coefficients)
+#define LLE_DENOMINATOR(T) ((T)->denominator)
+#define LLE_NEXT(T) ((T)->next)
+
+lambda_linear_expression lambda_linear_expression_new (int, int);
+void print_lambda_linear_expression (FILE *, lambda_linear_expression, int,
+ int, char);
+
+/* Loop structure. */
+typedef struct lambda_loop_s
+{
+ lambda_linear_expression lower_bound;
+ lambda_linear_expression upper_bound;
+ lambda_linear_expression linear_offset;
+ int step;
+} *lambda_loop;
+
+#define LL_LOWER_BOUND(T) ((T)->lower_bound)
+#define LL_UPPER_BOUND(T) ((T)->upper_bound)
+#define LL_LINEAR_OFFSET(T) ((T)->linear_offset)
+#define LL_STEP(T) ((T)->step)
+
+/* Loop nest structure. */
+typedef struct
+{
+ lambda_loop *loops;
+ int depth;
+ int invariants;
+} *lambda_loopnest;
+
+#define LN_LOOPS(T) ((T)->loops)
+#define LN_DEPTH(T) ((T)->depth)
+#define LN_INVARIANTS(T) ((T)->invariants)
+
+lambda_loopnest lambda_loopnest_new (int, int);
+lambda_loopnest lambda_loopnest_transform (lambda_loopnest, lambda_trans_matrix);
+void print_lambda_loopnest (FILE *, lambda_loopnest, char);
+
+#define lambda_loop_new() (lambda_loop) ggc_alloc_cleared (sizeof (struct lambda_loop_s))
+
+void print_lambda_loop (FILE *, lambda_loop, int, int, char);
+
+void print_lambda_vector (FILE *, lambda_vector, int);
+
+lambda_matrix lambda_matrix_new (int, int);
+
+void lambda_matrix_id (lambda_matrix, int);
+void lambda_matrix_copy (lambda_matrix, lambda_matrix, int, int);
+void lambda_matrix_negate (lambda_matrix, lambda_matrix, int, int);
+void lambda_matrix_transpose (lambda_matrix, lambda_matrix, int, int);
+void lambda_matrix_add (lambda_matrix, lambda_matrix, lambda_matrix, int,
+ int);
+void lambda_matrix_add_mc (lambda_matrix, int, lambda_matrix, int,
+ lambda_matrix, int, int);
+void lambda_matrix_mult (lambda_matrix, lambda_matrix, lambda_matrix,
+ int, int, int);
+void lambda_matrix_delete_rows (lambda_matrix, int, int, int);
+void lambda_matrix_row_exchange (lambda_matrix, int, int);
+void lambda_matrix_row_add (lambda_matrix, int, int, int, int);
+void lambda_matrix_row_negate (lambda_matrix mat, int, int);
+void lambda_matrix_row_mc (lambda_matrix, int, int, int);
+void lambda_matrix_col_exchange (lambda_matrix, int, int, int);
+void lambda_matrix_col_add (lambda_matrix, int, int, int, int);
+void lambda_matrix_col_negate (lambda_matrix, int, int);
+void lambda_matrix_col_mc (lambda_matrix, int, int, int);
+int lambda_matrix_inverse (lambda_matrix, lambda_matrix, int);
+void lambda_matrix_hermite (lambda_matrix, int, lambda_matrix, lambda_matrix);
+int lambda_matrix_first_nz_vec (lambda_matrix, int, int, int);
+void lambda_matrix_project_to_null (lambda_matrix, int, int, int,
+ lambda_vector);
+void print_lambda_matrix (FILE *, lambda_matrix, int, int);
+
+lambda_trans_matrix lambda_trans_matrix_new (int, int);
+bool lambda_trans_matrix_is_nonsingular (lambda_trans_matrix);
+bool lambda_trans_matrix_is_fullrank (lambda_trans_matrix);
+int lambda_trans_matrix_rank (lambda_trans_matrix);
+lambda_trans_matrix lambda_trans_matrix_base (lambda_trans_matrix);
+lambda_trans_matrix lambda_trans_matrix_padding (lambda_trans_matrix);
+lambda_trans_matrix lambda_trans_matrix_inverse (lambda_trans_matrix);
+void print_lambda_trans_matrix (FILE *, lambda_trans_matrix);
+void lambda_matrix_vector_mult (lambda_matrix, int, int, lambda_vector,
+ lambda_vector);
+
+lambda_body_vector lambda_body_vector_new (int);
+lambda_body_vector lambda_body_vector_compute_new (lambda_trans_matrix,
+ lambda_body_vector);
+void print_lambda_body_vector (FILE *, lambda_body_vector);
+struct loop;
+lambda_loopnest gcc_loopnest_to_lambda_loopnest (struct loop *,
+ varray_type *);
+void lambda_loopnest_to_gcc_loopnest (struct loop *, varray_type,
+ lambda_loopnest,
+ lambda_trans_matrix);
+
+
+static inline void lambda_vector_negate (lambda_vector, lambda_vector, int);
+static inline void lambda_vector_mult_const (lambda_vector, lambda_vector, int, int);
+static inline void lambda_vector_add (lambda_vector, lambda_vector,
+ lambda_vector, int);
+static inline void lambda_vector_add_mc (lambda_vector, int, lambda_vector, int,
+ lambda_vector, int);
+static inline void lambda_vector_copy (lambda_vector, lambda_vector, int);
+static inline bool lambda_vector_zerop (lambda_vector, int);
+static inline void lambda_vector_clear (lambda_vector, int);
+static inline bool lambda_vector_equal (lambda_vector, lambda_vector, int);
+static inline int lambda_vector_min_nz (lambda_vector, int, int);
+static inline int lambda_vector_first_nz (lambda_vector, int, int);
+
+/* Allocate a new vector of given SIZE. */
+
+static inline lambda_vector
+lambda_vector_new (int size)
+{
+ return ggc_alloc_cleared (size * sizeof(int));
+}
+
+/* Negate vector VEC1 with length SIZE and store it in VEC2. */
+
+static inline void
+lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
+ int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ if (vec1[i] != 0)
+ vec2[i] = (-1) * vec1[i];
+}
+
+/* Multiply vector VEC1 of length SIZE by a constant CONST1,
+ and store the result in VEC2. */
+
+static inline void
+lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
+ int size, int const1)
+{
+ int i;
+
+ if (const1 == 0)
+ lambda_vector_clear (vec2, size);
+ else
+ for (i = 0; i < size; i++)
+ vec2[i] = const1 * vec1[i];
+}
+
+/* VEC3 = VEC1+VEC2, where all three the vectors are of length SIZE. */
+
+static inline void
+lambda_vector_add (lambda_vector vec1, lambda_vector vec2,
+ lambda_vector vec3, int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ vec3[i] = vec1[i] + vec2[i];
+}
+
+/* VEC3 = CONSTANT1*VEC1 + CONSTANT2*VEC2. All vectors have length SIZE. */
+
+static inline void
+lambda_vector_add_mc (lambda_vector vec1, int const1,
+ lambda_vector vec2, int const2,
+ lambda_vector vec3, int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ vec3[i] = const1 * vec1[i] + const2 * vec2[i];
+}
+
+/* Copy the elements of vector VEC1 with length SIZE to VEC2. */
+
+static inline void
+lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
+ int size)
+{
+ memcpy (vec2, vec1, size * sizeof (int));
+}
+
+/* Return true if vector VEC1 of length SIZE is the zero vector. */
+
+static inline bool
+lambda_vector_zerop (lambda_vector vec1, int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ if (vec1[i] != 0)
+ return false;
+ return true;
+}
+
+/* Clear out vector VEC1 of length SIZE. */
+
+static inline void
+lambda_vector_clear (lambda_vector vec1, int size)
+{
+ memset (vec1, 0, size * sizeof (int));
+}
+
+/* Return true if two vectors are equal. */
+
+static inline bool
+lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ if (vec1[i] != vec2[i])
+ return false;
+ return true;
+}
+
+/* Return the minimum non-zero element in vector VEC1 between START and N.
+ We must have START <= N. */
+
+static inline int
+lambda_vector_min_nz (lambda_vector vec1, int n, int start)
+{
+ int j;
+ int min = -1;
+
+ for (j = start; j < n; j++)
+ {
+ if (vec1[j])
+ if (min < 0 || vec1[j] < vec1[min])
+ min= j;
+ }
+
+ if (min < 0)
+ abort ();
+
+ return min;
+}
+
+/* Return the first nonzero element of vector VEC1 between START and N.
+ We must have START <= N. Returns N if VEC1 is the zero vector. */
+
+static inline int
+lambda_vector_first_nz (lambda_vector vec1, int n, int start)
+{
+ int j;
+ for (j = start; j < n && vec1[j] == 0; j++) /* Nothing. */ ;
+ return j;
+}
+
+
+/* Multiply a vector by a matrix. */
+
+static inline void
+lambda_vector_matrix_mult (lambda_vector vect, int m, lambda_matrix mat,
+ int n, lambda_vector dest)
+{
+ int i, j;
+ memset (dest, 0, n * sizeof (int));
+ for (i = 0; i < n; i++)
+ for (j = 0; j < m; j++)
+ dest[i] += mat[j][i] * vect[j];
+}
+
+
+#endif /* LAMBDA_H */
+
Index: opts.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/opts.c,v
retrieving revision 1.31.2.22.2.8
diff -u -3 -p -w -B -b -r1.31.2.22.2.8 opts.c
--- opts.c 3 Mar 2004 18:42:14 -0000 1.31.2.22.2.8
+++ opts.c 4 Mar 2004 21:30:49 -0000
@@ -1467,6 +1467,10 @@ common_handle_option (size_t scode, cons
flag_all_data_deps = value;
break;


+    case OPT_ftree_loop_linear:
+      flag_tree_loop_linear = value;
+      break;
+
     case OPT_ftree_elim_checks:
       flag_tree_elim_checks = value;
       break;
Index: timevar.def
===================================================================
RCS file: /cvs/gcc/gcc/gcc/timevar.def,v
retrieving revision 1.14.2.28.2.7
diff -u -3 -p -w -B -b -r1.14.2.28.2.7 timevar.def
--- timevar.def	3 Mar 2004 18:42:14 -0000	1.14.2.28.2.7
+++ timevar.def	4 Mar 2004 21:30:49 -0000
@@ -85,6 +85,7 @@ DEFTIMEVAR (TV_TREE_DCE		     , "tree co
 DEFTIMEVAR (TV_TREE_CD_DCE	     , "tree aggressive DCE")
 DEFTIMEVAR (TV_TREE_DSE		     , "tree DSE")
 DEFTIMEVAR (TV_TREE_LOOP	     , "tree loop optimization")
+DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear transforms")
 DEFTIMEVAR (TV_TREE_LOOP_IVOPTS	     , "tree iv optimization")
 DEFTIMEVAR (TV_TREE_VECTORIZATION    , "tree loop vectorization")
 DEFTIMEVAR (TV_TREE_CH		     , "tree copy headers")
Index: tree-flow.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-flow.h,v
retrieving revision 1.1.4.177.2.12
diff -u -3 -p -w -B -b -r1.1.4.177.2.12 tree-flow.h
--- tree-flow.h	24 Feb 2004 23:52:28 -0000	1.1.4.177.2.12
+++ tree-flow.h	4 Mar 2004 21:30:49 -0000
@@ -617,6 +617,7 @@ void create_iv (tree, tree, tree, struct
 void test_loop_versioning (struct loops *loops);
 bool tree_ssa_loop_version (struct loops *, struct loop *, tree);
 bool for_each_index (tree *, bool (*) (tree, tree *, void *), void *);
+void linear_transform_loops (struct loops *, varray_type);

/* In tree-flow-inline.h */
static inline int phi_arg_from_edge (tree, edge);
Index: tree-loop-linear.c
===================================================================
RCS file: tree-loop-linear.c
diff -N tree-loop-linear.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ tree-loop-linear.c 4 Mar 2004 21:30:49 -0000
@@ -0,0 +1,111 @@
+/* Linear Loop transforms
+ Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+ Contributed by Daniel Berlin <dberlin@dberlin.org>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA. */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "errors.h"
+#include "ggc.h"
+#include "tree.h"
+#include "target.h"
+
+#include "rtl.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "tree-fold-const.h"
+#include "expr.h"
+#include "optabs.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-scalar-evolution.h"
+#include "tree-pass.h"
+#include "varray.h"
+#include "lambda.h"
+
+/* Linear loop transforms include any composition of interchange,
+ scaling, skewing, and reversal. They are used to change the
+ iteration order of loop nests in order to optimize data locality of
+ traversals, or remove dependences that prevent
+ parallelization/vectorization/etc.
+
+ TODO: Determine reuse vectors/matrix and use it to determine optimal
+ transform matrix for locality purposes.
+ TODO: Add dependence matrix collection and approriate matrix
+ calculations so we can determine if a given transformation matrix
+ is legal for a loop.
+ TODO:
+*/
+
+/* Perform a set of linear transforms on LOOPS. */
+
+void
+linear_transform_loops (struct loops *loops,
+ varray_type ev_info ATTRIBUTE_UNUSED)
+{
+ unsigned int i;
+ for (i = 1; i < loops->num; i++)
+ {
+ struct loop *loop_nest = loops->parray[i];
+ varray_type oldivs;
+ lambda_loopnest before, after;
+ lambda_trans_matrix trans;
+
+ if (!loop_nest->inner)
+ continue;
+ flow_loop_scan (loop_nest, LOOP_ALL);
+ flow_loop_scan (loop_nest->inner, LOOP_ALL);
+ if (loop_nest->num_pre_header_edges != 1
+ || loop_nest->inner->num_pre_header_edges != 1)
+ continue;
+
+ before = gcc_loopnest_to_lambda_loopnest (loop_nest, &oldivs);
+ if (!before)
+ continue;
+ if (tree_dump_file)
+ {
+ fprintf (tree_dump_file, "Before:\n");
+ print_lambda_loopnest (tree_dump_file, before, 'i');
+ }
+ trans = lambda_trans_matrix_new (LN_DEPTH (before), LN_DEPTH (before));
+#if 1
+ lambda_matrix_id (LTM_MATRIX (trans), LN_DEPTH (before));
+#else
+ /* This is a 2x2 interchange matrix. */
+ LTM_MATRIX (trans)[0][0] = 0;
+ LTM_MATRIX (trans)[0][1] = 1;
+ LTM_MATRIX (trans)[1][0] = 1;
+ LTM_MATRIX (trans)[1][1] = 0;
+#endif
+ after = lambda_loopnest_transform (before, trans);
+ if (tree_dump_file)
+ {
+ fprintf (tree_dump_file, "After:\n");
+ print_lambda_loopnest (tree_dump_file, after, 'u');
+ }
+ lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, after, trans);
+ }
+}
Index: tree-optimize.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-optimize.c,v
retrieving revision 1.1.4.98.2.13
diff -u -3 -p -w -B -b -r1.1.4.98.2.13 tree-optimize.c
--- tree-optimize.c 3 Mar 2004 18:42:15 -0000 1.1.4.98.2.13
+++ tree-optimize.c 4 Mar 2004 21:30:49 -0000
@@ -325,6 +325,7 @@ init_tree_optimization_passes (void)
NEXT_PASS (pass_scev_anal);
NEXT_PASS (pass_scev_depend);
NEXT_PASS (pass_scev_elim_checks);
+ NEXT_PASS (pass_scev_linear_transform);
NEXT_PASS (pass_ddg);
NEXT_PASS (pass_scev_vectorize);
NEXT_PASS (pass_delete_ddg);
Index: tree-pass.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-pass.h,v
retrieving revision 1.1.4.5
diff -u -3 -p -w -B -b -r1.1.4.5 tree-pass.h
--- tree-pass.h 3 Mar 2004 18:42:15 -0000 1.1.4.5
+++ tree-pass.h 4 Mar 2004 21:30:49 -0000
@@ -106,6 +106,7 @@ extern struct tree_opt_pass pass_scev;
extern struct tree_opt_pass pass_scev_init;
extern struct tree_opt_pass pass_scev_anal;
extern struct tree_opt_pass pass_scev_depend;
+extern struct tree_opt_pass pass_scev_linear_transform;
extern struct tree_opt_pass pass_scev_elim_checks;
extern struct tree_opt_pass pass_scev_vectorize;
extern struct tree_opt_pass pass_scev_done;
Index: tree-scalar-evolution.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-scalar-evolution.c,v
retrieving revision 1.1.2.16
diff -u -3 -p -w -B -b -r1.1.2.16 tree-scalar-evolution.c
--- tree-scalar-evolution.c 3 Mar 2004 18:42:15 -0000 1.1.2.16
+++ tree-scalar-evolution.c 4 Mar 2004 21:30:50 -0000
@@ -2769,6 +2769,14 @@ scev_elim_checks (void)
eliminate_redundant_checks ();
}


+/* Runs the linear loop transformations.  */
+
+static void
+scev_linear_transform (void)
+{
+  linear_transform_loops (current_loops, *scalar_evolution_info);
+}
+
 /* Runs the vectorization pass.  */

 static void
@@ -2801,7 +2809,8 @@ gate_scev (void)
   return (flag_scalar_evolutions != 0
 	  || flag_tree_vectorize != 0
 	  || flag_all_data_deps != 0
-	  || flag_tree_elim_checks != 0);
+	  || flag_tree_elim_checks != 0
+	  || flag_tree_loop_linear != 0);
 }

 static bool
@@ -2823,6 +2832,13 @@ gate_scev_elim_checks (void)
 }

 static bool
+gate_scev_linear_transform (void)
+{
+  return current_loops && flag_tree_loop_linear != 0;
+}
+
+
+static bool
 gate_scev_vectorize (void)
 {
   return current_loops && flag_tree_vectorize != 0;
@@ -2907,6 +2923,23 @@ struct tree_opt_pass pass_scev_vectorize
   0,					/* todo_flags_start */
   TODO_dump_func | TODO_rename_vars	/* todo_flags_finish */
 };
+
+struct tree_opt_pass pass_scev_linear_transform =
+{
+  "ltrans",				/* name */
+  gate_scev_linear_transform,		/* gate */
+  scev_linear_transform,       		/* execute */
+  NULL,					/* sub */
+  NULL,					/* next */
+  0,					/* static_pass_number */
+  TV_TREE_LINEAR_TRANSFORM,  		/* tv_id */
+  PROP_cfg | PROP_ssa,			/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_dump_func                	/* todo_flags_finish */
+};
+

 struct tree_opt_pass pass_scev_elim_checks =
 {


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]