This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch, fortran, RFC] First steps towards inlining matmul


Hello world,

this is a first draft of a patch to inline matmul (PR 37171). This is
preliminary, but functional as far as it goes.  Definitely for the
next stage one :-)

Basically, it takes

c = matmul(a,b)

and converts this into

   BLOCK
     integer i,j,k
     c = 0
     do j=0, size(b,2)-1
       do k=0, size(a, 2)-1
         do i=0, size(a, 1)-1
            c(i * stride(c,1) + lbound(c,1), j * stride(c,2) +
lbound(c,2)) =
	    c(i * stride(c,1) + lbound(c,1), j * stride(c,2) + lbound(c,2)) +
            a(i * stride(a,1) + lbound(a,1), k * stride(a,2) +
lbound(a,2)) *
            b(k * stride(b,1) + lbound(b,1), j * stride(b,2) + lbound(b,2))
         end do
       end do
     end do
   END BLOCK

For this, I wrote something like a scalarizer that only operates on the
AST only.  I find it rather straigtforward to use (in contrast to the
one we have), but that may also be due to the fact that I wrote it
myself :-)

I chose zero-based loop variables because I didn't want to special-case
too many strange array bounds, and I trusted the induction variable
optimization and CSE in the middle end and in the RTL optimization.
If this approach suboptimizes something, let me know.

References are frozen to make sure that no additional evaluations are
done on.

The patch to simplify.c is not strictly necessary, but it should open
up some optimization possiblities and, frankly, makes the
-fdump-fortran-optimized output at least halfway readable.

This patch needs to be extended in different directions:

- Currently, it only handles the case of two-dimensional arguments

- Special cases like transpose(a) and spread(a) in the arguments
  arguments should be added, possibly with a reordering of loops
  and addition of some more variables.

- Temporary handling for arguments and results, which does not depend
  on allocatable RHS.  I am thinking of creating nested blocks,
  where the inner block has arrays with the bounds of the
  references.

- Cases like matmul(a+1,b) can also be handled in the loop,
  without a temporary

- This needs command-line arguments, -finline-matmul and
  -finline-matmul-limit=, similar to the handling of BLAS.

- Bounds checking could/should also be handled.  Currently, this
  causes the only regression because of a different error message.
  We need to call gfortran_runtime_error directly from the front end,
  and to be able to tell the bounds-checking code "do not worry about
  bounds checking this, it has already been taken care of".

- I already have some test cases, but for this, I would really like
  to cover all code paths.  This also needs some more work.

So, what do you think about this?

	Thomas

2015-04-05  Thomas Koenig  <tkoenig@gcc.gnu.org>

        * frontend-passes.c (create_var):  Add optional argument
        vname as part of the name.  Split off block creation into
        (insert_block): New function.
        (cfe_expr_0):  Use "fcn" as part of tempoary name.
        (optimize_namesapce):  Call optimize_matmul_assign.
        (combine_array_constructor):  Use "constr" as part of
        temporary name.
        (get_array_inq_function):  New function.
        (is_function_or_op):  New function.
        (has_function_or_op):  New function.
        (freeze_expr):  New function.
        (freeze_references):  New function.
        (convert_to_index_kind):  New function.
        (create_do_loop):  New function.
        (get_operand):  New function.
        (get_size_1):  New function.
        (scalarize_expr):  New function.
        (optimize_matmul_assign): New function.
        * simplify.c (simplify_bound):  Simplify the case of the
        lower bound of an assumed-shape argument.
Index: frontend-passes.c
===================================================================
--- frontend-passes.c	(Revision 221757)
+++ frontend-passes.c	(Arbeitskopie)
@@ -43,7 +43,11 @@ static void doloop_warn (gfc_namespace *);
 static void optimize_reduction (gfc_namespace *);
 static int callback_reduction (gfc_expr **, int *, void *);
 static void realloc_strings (gfc_namespace *);
-static gfc_expr *create_var (gfc_expr *);
+static gfc_expr *create_var (gfc_expr *, const char *vname=NULL);
+static int optimize_matmul_assign (gfc_code **, int *, void *);
+static gfc_code * create_do_loop (gfc_expr *, gfc_expr *, gfc_expr *,
+				  locus *, gfc_namespace *, 
+				  char *vname=NULL);
 
 /* How deep we are inside an argument list.  */
 
@@ -95,6 +99,10 @@ static bool in_assoc_list;
 
 /* Entry point - run all passes for a namespace.  */
 
+/* Counter for temporary variables.  */
+
+static int var_num = 1;
+
 void
 gfc_run_passes (gfc_namespace *ns)
 {
@@ -157,7 +165,7 @@ realloc_string_callback (gfc_code **c, int *walk_s
     return 0;
   
   current_code = c;
-  n = create_var (expr2);
+  n = create_var (expr2, "trim");
   co->expr2 = n;
   return 0;
 }
@@ -524,29 +532,11 @@ constant_string_length (gfc_expr *e)
 
 }
 
-/* Returns a new expression (a variable) to be used in place of the old one,
-   with an assignment statement before the current statement to set
-   the value of the variable. Creates a new BLOCK for the statement if
-   that hasn't already been done and puts the statement, plus the
-   newly created variables, in that block.  Special cases:  If the
-   expression is constant or a temporary which has already
-   been created, just copy it.  */
-
-static gfc_expr*
-create_var (gfc_expr * e)
+static gfc_namespace*
+insert_block ()
 {
-  char name[GFC_MAX_SYMBOL_LEN +1];
-  static int num = 1;
-  gfc_symtree *symtree;
-  gfc_symbol *symbol;
-  gfc_expr *result;
-  gfc_code *n;
   gfc_namespace *ns;
-  int i;
 
-  if (e->expr_type == EXPR_CONSTANT || is_fe_temp (e))
-    return gfc_copy_expr (e);
-
   /* If the block hasn't already been created, do so.  */
   if (inserted_block == NULL)
     {
@@ -578,7 +568,37 @@ constant_string_length (gfc_expr *e)
   else
     ns = inserted_block->ext.block.ns;
 
-  sprintf(name, "__var_%d",num++);
+  return ns;
+}
+
+/* Returns a new expression (a variable) to be used in place of the old one,
+   with an optional assignment statement before the current statement to set
+   the value of the variable. Creates a new BLOCK for the statement if that
+   hasn't already been done and puts the statement, plus the newly created
+   variables, in that block.  Special cases: If the expression is constant or
+   a temporary which has already been created, just copy it.  */
+
+static gfc_expr*
+create_var (gfc_expr * e, const char *vname)
+{
+  char name[GFC_MAX_SYMBOL_LEN +1];
+  gfc_symtree *symtree;
+  gfc_symbol *symbol;
+  gfc_expr *result;
+  gfc_code *n;
+  gfc_namespace *ns;
+  int i;
+
+  if (e->expr_type == EXPR_CONSTANT || is_fe_temp (e))
+    return gfc_copy_expr (e);
+
+  ns = insert_block ();
+
+  if (vname)
+    snprintf (name, GFC_MAX_SYMBOL_LEN, "__var_%d_%s", var_num++, vname);
+    else
+  sprintf (name, "__var_%d", var_num++);
+
   if (gfc_get_sym_tree (name, ns, &symtree, false) != 0)
     gcc_unreachable ();
 
@@ -658,6 +678,7 @@ constant_string_length (gfc_expr *e)
 		     "Creating array temporary at %L", &(e->where));
     }
 
+
   /* Generate the new assignment.  */
   n = XCNEW (gfc_code);
   n->op = EXEC_ASSIGN;
@@ -724,7 +745,7 @@ cfe_expr_0 (gfc_expr **e, int *walk_subtrees,
 	  if (gfc_dep_compare_functions (*ei, *ej, true) == 0)
 	    {
 	      if (newvar == NULL)
-		newvar = create_var (*ei);
+		newvar = create_var (*ei, "fcn");
 
 	      if (warn_function_elimination)
 		do_warn_function_elimination (*ej);
@@ -931,6 +952,7 @@ convert_elseif (gfc_code **c, int *walk_subtrees A
   /*  Don't walk subtrees.  */
   return 0;
 }
+
 /* Optimize a namespace, including all contained namespaces.  */
 
 static void
@@ -947,6 +969,7 @@ optimize_namespace (gfc_namespace *ns)
   gfc_code_walker (&ns->code, convert_elseif, dummy_expr_callback, NULL);
   gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL);
   gfc_code_walker (&ns->code, optimize_code, optimize_expr, NULL);
+  gfc_code_walker (&ns->code, optimize_matmul_assign, dummy_expr_callback, NULL);
 
   /* BLOCKs are handled in the expression walker below.  */
   for (ns = ns->contained; ns; ns = ns->sibling)
@@ -1222,7 +1245,7 @@ combine_array_constructor (gfc_expr *e)
   if (op2->ts.type == BT_CHARACTER)
     return false;
 
-  scalar = create_var (gfc_copy_expr (op2));
+  scalar = create_var (gfc_copy_expr (op2), "constr");
 
   oldbase = op1->value.constructor;
   newbase = NULL;
@@ -1939,7 +1962,576 @@ doloop_warn (gfc_namespace *ns)
   gfc_code_walker (&ns->code, doloop_code, do_function, NULL);
 }
 
+/* Auxiliary function to build and simplify an array inquiry function.
+   dim is zero-based.  */
 
+static gfc_expr *
+get_array_inq_function (gfc_expr *e, int dim, gfc_isym_id id)
+{
+
+  gfc_expr *fcn;
+  gfc_actual_arglist *actual_arglist, *n1, *n2;
+  gfc_expr *dim_arg, *kind;
+  const char *name;
+
+  fcn = gfc_get_expr ();
+  fcn->expr_type = EXPR_FUNCTION;
+  fcn->value.function.isym = gfc_intrinsic_function_by_id (id);
+  actual_arglist = gfc_get_actual_arglist ();
+  actual_arglist->expr = gfc_copy_expr (e);
+
+  n1 = gfc_get_actual_arglist ();
+  dim_arg = gfc_get_int_expr (gfc_default_integer_kind, &e->where, dim);
+  n1->expr = dim_arg;
+  actual_arglist->next = n1;
+
+  n2 = gfc_get_actual_arglist ();
+  kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where,
+			   gfc_index_integer_kind);
+  n2->expr = kind;
+  n1->next = n2;
+
+  fcn->value.function.actual = actual_arglist;
+  fcn->where = e->where;
+  fcn->ts.type = BT_INTEGER;
+  fcn->ts.kind = gfc_index_integer_kind;
+
+  switch (id)
+    {
+    case GFC_ISYM_LBOUND:
+      name = "__internal_lbound";
+      break;
+
+    case GFC_ISYM_UBOUND:
+      name = "__internal_ubound";
+      break;
+
+    case GFC_ISYM_SIZE:
+      name = "__internal_size";
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  gfc_get_sym_tree (name, current_ns, &fcn->symtree, false);
+  fcn->symtree->n.sym->ts = fcn->ts;
+  fcn->symtree->n.sym->attr.flavor = FL_PROCEDURE;
+  fcn->symtree->n.sym->attr.function = 1;
+  fcn->symtree->n.sym->attr.elemental = 1;
+  fcn->symtree->n.sym->attr.referenced = 1;
+  fcn->symtree->n.sym->attr.access = ACCESS_PRIVATE;
+  gfc_commit_symbol (fcn->symtree->n.sym);
+  gfc_simplify_expr (fcn, 0);
+  return fcn;
+}
+
+/* Callback function for has_function_or_op.  */
+
+static int
+is_function_or_op (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED,
+	     void *data ATTRIBUTE_UNUSED)
+{
+  if ((*e) == 0)
+    return 0;
+  else
+    return (*e)->expr_type == EXPR_FUNCTION
+      || (*e)->expr_type == EXPR_OP;
+}
+
+/* Returns true if the expression contains a function.  */
+
+static bool
+has_function_or_op (gfc_expr **e)
+{
+  if (e == NULL)
+    return false;
+  else
+    return gfc_expr_walker (e, is_function_or_op, NULL);
+}
+
+/* Freeze a single expression.  */
+
+static void
+freeze_expr (gfc_expr **ep)
+{
+  gfc_expr *ne;
+  if (has_function_or_op (ep))
+    {
+      ne = create_var (*ep, "freeze");
+      *ep = ne;
+    }
+}
+
+/* Go through an expression's references and assign them to temporary
+   variables if they contain functions.  This is usually done prior to
+   front-end scalarization to avoid multiple invocations of functions.  */
+
+static void
+freeze_references (gfc_expr *e)
+{
+  gfc_ref *r;
+  gfc_array_ref *ar;
+  int i;
+
+  for (r=e->ref; r; r=r->next)
+    {
+      if (r->type == REF_SUBSTRING)
+	{
+	  if (r->u.ss.start != NULL)
+	    freeze_expr (&r->u.ss.start);
+
+	  if (r->u.ss.end != NULL)
+	    freeze_expr (&r->u.ss.end);
+	}
+      else if (r->type == REF_ARRAY)
+	{
+	  ar = &r->u.ar;
+	  switch (ar->type)
+	    {
+	    case AR_FULL:
+	      break;
+
+	    case AR_SECTION:
+	      for (i=0; i<ar->dimen; i++)
+		{
+		  if (ar->dimen_type[i] == DIMEN_RANGE)
+		    {
+		      freeze_expr (&ar->start[i]);
+		      freeze_expr (&ar->end[i]);
+		      freeze_expr (&ar->stride[i]);
+		    }
+		}
+	    default:
+	      break;
+	    }
+	}
+    }
+}
+
+/* Convert to gfc_index_integer_kind if needed, just do a copy otherwise.  */
+
+static gfc_expr *
+convert_to_index_kind (gfc_expr *e)
+{
+  gfc_expr *res;
+
+  gcc_assert (e != NULL);
+
+  res = gfc_copy_expr (e);
+
+  gcc_assert (e->ts.type == BT_INTEGER);
+
+  if (res->ts.kind != gfc_index_integer_kind)
+    {
+      gfc_typespec ts;
+      gfc_clear_ts (&ts);
+      ts.type = BT_INTEGER;
+      ts.kind = gfc_index_integer_kind;
+
+      gfc_convert_type_warn (e, &ts, 2, 0);
+    }
+
+  return res;
+}
+
+/* Function to create a DO loop including creation of the
+   iteration variable.  gfc_expr are copied.*/
+
+static gfc_code *
+create_do_loop (gfc_expr *start, gfc_expr *end, gfc_expr *step, locus *where,
+		gfc_namespace *ns, char *vname)
+{
+
+  char name[GFC_MAX_SYMBOL_LEN +1];
+  gfc_symtree *symtree;
+  gfc_symbol *symbol;
+  gfc_expr *i;
+  gfc_code *n, *n2;
+
+  /* Create an expression for the iteration variable.  */
+  if (vname)
+    sprintf (name, "__var_%d_do_%s", var_num++, vname);
+  else
+    sprintf (name, "__var_%d_do", var_num++);
+
+
+  if (gfc_get_sym_tree (name, ns, &symtree, false) != 0)
+    gcc_unreachable ();
+
+  symbol = symtree->n.sym;
+  symbol->ts.type = BT_INTEGER;
+  symbol->ts.kind = gfc_index_integer_kind;
+  symbol->attr.flavor = FL_VARIABLE;
+  symbol->attr.referenced = 1;
+  symbol->attr.dimension = 0;
+  symbol->attr.fe_temp = 1;
+  gfc_commit_symbol (symbol);
+
+  i = gfc_get_expr ();
+  i->expr_type = EXPR_VARIABLE;
+  i->ts = symbol->ts;
+  i->rank = 0;
+  i->where = *where;
+  i->symtree = symtree;
+
+  n = XCNEW (gfc_code);
+  n->op = EXEC_DO;
+  n->loc = *where;
+  n->ext.iterator = gfc_get_iterator ();
+  n->ext.iterator->var = i;
+  n->ext.iterator->start = convert_to_index_kind (start);
+  n->ext.iterator->end = convert_to_index_kind (end);
+  if (step)
+    n->ext.iterator->step = convert_to_index_kind (step);
+  else
+    n->ext.iterator->step = gfc_get_int_expr (gfc_index_integer_kind,
+					      where, 1);
+
+  n2 = XCNEW (gfc_code);
+  n2->op = EXEC_DO;
+  n2->loc = *where;
+  n2->next = NULL;
+  n->block = n2;
+  return n;
+}
+
+/* Return an operation of one two gfc_expr (one if e2 is NULL. This assumes
+   compatible typespecs.  */
+
+static gfc_expr *
+get_operand (gfc_intrinsic_op op, gfc_expr *e1, gfc_expr *e2)
+{
+  gfc_expr *res;
+
+  res = gfc_get_expr ();
+  res->ts = e1->ts;
+  res->where = e1->where;
+  res->expr_type = EXPR_OP;
+  res->value.op.op = op;
+  res->value.op.op1 = e1;
+  res->value.op.op2 = e2;
+  gfc_simplify_expr (res, 0);
+  return res;
+}
+
+/* Get the upper bound of the DO loops for matmul along
+   a dimension, i.e. size minus one.  */
+static gfc_expr*
+get_size_m1 (gfc_expr *e, int dimen)
+{
+  mpz_t size;
+  gfc_expr *res;
+
+  if (gfc_array_dimen_size (e, dimen, &size))
+    {
+      res = gfc_get_constant_expr (BT_INTEGER,
+				   gfc_index_integer_kind, &e->where);
+      mpz_sub_ui (res->value.integer, size, 1);
+      mpz_clear (size);
+    }
+  else
+    {
+      res = get_operand (INTRINSIC_MINUS,
+			 get_array_inq_function (e, dimen + 1, GFC_ISYM_SIZE),
+			 gfc_get_int_expr (gfc_index_integer_kind,
+					   &e->where, 1));
+      gfc_simplify_expr (res, 0);
+    }
+
+  return res;
+}
+
+/* Function to return a scalarized expression. It is assumed that indices are
+ zero based to make generation of DO loops easier.  A zero as index will
+ access the first element along a dimension.  Single element references will
+ be skipped.  A NULL as an expression will be replaced by a full reference.
+ This assumes that the index loops have gfc_index_integer_kind, and that all
+ references have been frozen.  */
+
+static gfc_expr*
+scalarized_expr (gfc_expr *e_in, gfc_expr **index, int count_index)
+{
+  gfc_ref *ref;
+  gfc_array_ref *ar;
+  int i;
+  int rank;
+  gfc_expr *e;
+  int i_index;
+  bool was_fullref;
+
+  e = gfc_copy_expr(e_in);
+
+  rank = e->rank;
+
+  ref = e->ref;
+
+  while (ref)
+    {
+      if (ref->type == REF_ARRAY
+	  && (ref->u.ar.type == AR_FULL || ref->u.ar.type == AR_SECTION))
+	break;
+    }
+  ar = &ref->u.ar;
+
+  /* We scalarize count_index variables, reducing the rank by count_index.  */
+
+  e->rank = rank - count_index;
+
+  was_fullref = ar->type == AR_FULL;
+
+  if (e->rank == 0)
+    ar->type = AR_ELEMENT;
+  else
+    ar->type = AR_SECTION;
+
+  ar->dimen = rank;
+
+  /* Loop over the indices.  For each index, create the expression
+     index + lbound(e, dim).  */
+  
+  i_index = 0;
+  for (i=0; i<rank; i++)
+    {
+      if (was_fullref || ar->dimen_type[i] == DIMEN_RANGE)
+	{
+	  if (index[i_index] != NULL)
+	    {
+	      gfc_expr *lbound, *nindex;
+	      gfc_expr *loopvar;
+	      
+	      loopvar = gfc_copy_expr (index[i_index]); 
+	      
+	      if (ar->stride[i])
+		{
+		  gfc_expr *tmp;
+
+		  tmp = gfc_copy_expr(ar->stride[i]);
+		  if (tmp->ts.kind != gfc_index_integer_kind)
+		    {
+		      gfc_typespec ts;
+		      gfc_clear_ts (&ts);
+		      ts.type = BT_INTEGER;
+		      ts.kind = gfc_index_integer_kind;
+		      gfc_convert_type (tmp, &ts, 2);
+		    }
+		  nindex = get_operand (INTRINSIC_TIMES, loopvar, tmp);
+		}
+	      else
+		nindex = loopvar;
+	      
+	      if (ar->start[i])
+		{
+		  lbound = gfc_copy_expr (ar->start[i]);
+		  if (lbound->ts.kind != gfc_index_integer_kind)
+		    {
+		      gfc_typespec ts;
+		      gfc_clear_ts (&ts);
+		      ts.type = BT_INTEGER;
+		      ts.kind = gfc_index_integer_kind;
+		      gfc_convert_type (lbound, &ts, 2);
+
+		    }
+		}
+	      else
+		lbound = get_array_inq_function (e_in, i+1, GFC_ISYM_LBOUND);
+
+	      ar->dimen_type[i] = DIMEN_ELEMENT;
+	      ar->start[i] = get_operand (INTRINSIC_PLUS, nindex, lbound);
+	      ar->end[i] = NULL;
+	      ar->stride[i] = NULL;
+	      gfc_simplify_expr (ar->start[i], 0);
+	    }
+	  else if (was_fullref)
+	    {
+	      ar->dimen_type[i] = DIMEN_RANGE;
+	      ar->start[i] = NULL;
+	      ar->end[i] = NULL;
+	      ar->stride[i] = NULL;
+	    }
+	  i_index ++;
+	}
+    }
+  return e;
+}
+
+
+/* Optimize assignments of the form c = matmul(a,b).
+   Handle only the cases currently where b and c are rank-two arrays.
+
+   This translates the code to
+
+   BLOCK
+     integer i,j,k
+     c = 0
+     do j=0, size(b,2)-1
+       do k=0, size(a, 2)-1
+         do i=0, size(a, 1)-1
+            c(i * stride(c,1) + lbound(c,1), j * stride(c,2) + lbound(c,2)) =
+	    c(i * stride(c,1) + lbound(c,1), j * stride(c,2) + lbound(c,2)) +
+            a(i * stride(a,1) + lbound(a,1), k * stride(a,2) + lbound(a,2)) *
+            b(k * stride(b,1) + lbound(b,1), j * stride(b,2) + lbound(b,2))
+         end do
+       end do
+     end do
+   END BLOCK
+   
+*/
+
+static int
+optimize_matmul_assign (gfc_code **c,
+			  int *walk_subtrees ATTRIBUTE_UNUSED,
+			  void *data ATTRIBUTE_UNUSED)
+{
+  gfc_code *co = *c;
+  gfc_expr *expr1, *expr2;
+  gfc_expr *matrix_a, *matrix_b;
+  gfc_actual_arglist *a, *b;
+  gfc_code *do_1, *do_2, *do_3, *assign_zero, *assign_matmul;
+  gfc_expr *zero_e;
+  gfc_expr *u1, *u2, *u3;
+  gfc_expr *list[2];
+  gfc_expr *ascalar, *bscalar, *cscalar;
+  gfc_expr *mult;
+  gfc_expr *var_1, *var_2, *var_3;
+  gfc_namespace *ns;
+  gfc_intrinsic_op op_times, op_plus;
+
+  if (co->op != EXEC_ASSIGN)
+    return 0;
+
+  expr1 = co->expr1;
+  expr2 = co->expr2;
+  if (expr2->expr_type != EXPR_FUNCTION
+      || expr2->value.function.isym == NULL
+      || expr2->value.function.isym->id != GFC_ISYM_MATMUL)
+    return 0;
+
+  /* Handling only two matrices of dimension two for the time being.  */
+
+  current_code = c;
+  inserted_block = NULL;
+  changed_statement = NULL;
+
+  a = expr2->value.function.actual;
+  matrix_a = a->expr;
+  b = a->next;
+  matrix_b = b->expr;
+
+  if (matrix_a->expr_type != EXPR_VARIABLE
+      || matrix_b->expr_type != EXPR_VARIABLE)
+    return 0;
+
+  if (matrix_a->rank != 2 || matrix_b->rank != 2)
+    return 0;
+
+  if (gfc_check_dependency (expr1, matrix_a, true)
+      || gfc_check_dependency (expr1, matrix_b, true))
+    return 0;
+
+  ns = insert_block ();
+
+  switch(expr1->ts.type)
+    {
+    case BT_INTEGER:
+      zero_e = gfc_get_int_expr (expr1->ts.kind, &expr1->where, 0);
+      op_times = INTRINSIC_TIMES;
+      op_plus = INTRINSIC_PLUS;
+      break;
+
+    case BT_LOGICAL:
+      op_times = INTRINSIC_AND;
+      op_plus = INTRINSIC_OR;
+      zero_e = gfc_get_logical_expr (expr1->ts.kind, &expr1->where,
+				     0);
+      break;
+    case BT_REAL:
+      zero_e = gfc_get_constant_expr (BT_REAL, expr1->ts.kind,
+				      &expr1->where);
+      mpfr_set_si (zero_e->value.real, 0, GFC_RND_MODE);
+      op_times = INTRINSIC_TIMES;
+      op_plus = INTRINSIC_PLUS;
+      break;
+
+    case BT_COMPLEX:
+      zero_e = gfc_get_constant_expr (BT_COMPLEX, expr1->ts.kind,
+				      &expr1->where);
+      mpc_set_si_si (zero_e->value.complex, 0, 0, GFC_RND_MODE);
+      op_times = INTRINSIC_TIMES;
+      op_plus = INTRINSIC_PLUS;
+
+      break;
+
+    default:
+      gcc_unreachable();
+    }
+
+  assign_zero = XCNEW (gfc_code);
+  assign_zero->op = EXEC_ASSIGN;
+  assign_zero->loc = co->loc;
+  assign_zero->expr1 = gfc_copy_expr (expr1);
+  assign_zero->expr2 = zero_e;
+
+  ns->code = assign_zero;
+  current_code = &ns->code;
+
+  freeze_references (matrix_a);
+  freeze_references (matrix_b);
+
+  u1 = get_size_m1 (matrix_b, 1);
+  u2 = get_size_m1 (matrix_a, 1);
+  u3 = get_size_m1 (matrix_a, 0);
+
+  do_1 = create_do_loop (gfc_get_int_expr (gfc_index_integer_kind,
+  					    &co->loc, 0),
+			 u1, NULL, &co->loc, ns);
+
+  do_2 = create_do_loop (gfc_get_int_expr (gfc_index_integer_kind,
+					   &co->loc, 0),
+			 u2, NULL, &co->loc, ns);
+
+  do_1->block->next = do_2;
+
+  do_3 = create_do_loop (gfc_get_int_expr (gfc_index_integer_kind,
+                                            &co->loc, 0),
+			 u3, NULL, &co->loc, ns);
+
+  do_2->block->next = do_3;
+
+  var_1 = do_1->ext.iterator->var;
+  var_2 = do_2->ext.iterator->var;
+  var_3 = do_3->ext.iterator->var;
+
+  assign_zero->next = do_1;
+
+  assign_matmul = XCNEW (gfc_code);
+  assign_matmul->op = EXEC_ASSIGN;
+  assign_matmul->loc = co->loc;
+
+  list[0] = var_3;
+  list[1] = var_1;
+  cscalar = scalarized_expr (gfc_copy_expr (co->expr1), list, 2);
+
+  list[0] = var_3;
+  list[1] = var_2;
+  ascalar = scalarized_expr (gfc_copy_expr (matrix_a), list, 2);
+
+  list[0] = var_2;
+  list[1] = var_1;
+  bscalar = scalarized_expr (gfc_copy_expr (matrix_b), list, 2);
+
+  assign_matmul->expr1 = gfc_copy_expr (cscalar);
+
+  mult = get_operand (op_times, ascalar, bscalar);
+  assign_matmul->expr2 = get_operand (op_plus, cscalar, mult);
+
+  do_3->block->next = assign_matmul;
+
+  co->next = NULL;
+  gfc_free_statements(co);
+  return 0;
+}
+
 #define WALK_SUBEXPR(NODE) \
   do							\
     {							\
Index: simplify.c
===================================================================
--- simplify.c	(Revision 221757)
+++ simplify.c	(Arbeitskopie)
@@ -3445,6 +3445,25 @@ simplify_bound (gfc_expr *array, gfc_expr *dim, gf
 
  done:
 
+  if (!upper && as->type == AS_ASSUMED_SHAPE && dim)
+    {
+      if (dim->expr_type == EXPR_CONSTANT)
+	{
+	  unsigned long int ndim;
+	  gfc_expr *lower, *res;
+
+	  ndim = mpz_get_si (dim->value.integer) - 1;
+	  lower = as->lower[ndim];
+	  if (lower->expr_type == EXPR_CONSTANT)
+	    {
+	      int nkind = mpz_get_si (kind->value.integer);
+	      res = gfc_copy_expr (lower);
+	      res->ts.kind = nkind;
+	      return res;
+	    }
+	}
+    }
+
   if (as && (as->type == AS_DEFERRED || as->type == AS_ASSUMED_SHAPE
 	     || as->type == AS_ASSUMED_RANK))
     return NULL;

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]