This is the mail archive of the fortran@gcc.gnu.org mailing list for the GNU Fortran project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: RFC: Patch to implement the fortran matmul/transpose optimisation


On Friday 09 September 2005 22:13, Steven Bosscher wrote:
> On Thursday 25 August 2005 12:20, Richard Sandiford wrote:
> > This patch implements the matmul (transpose (...), ...) optimisation
> > mentioned on the wiki.  It's just a preview really; it's obviously
> > not suitable for stage 3.  Feedback welcome!
>
> So with all the other big stuff going in, is this really not suitable
> for stage 3?  I'd like it a lot if we'd have this anyway.

This is an updated version of the patch that I'm using in my tree.
Perhaps other people want to try it out too, so...


Index: gcc/fortran/Make-lang.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/fortran/Make-lang.in,v
retrieving revision 1.22
diff -u -3 -p -r1.22 Make-lang.in
--- gcc/fortran/Make-lang.in	13 Sep 2005 06:24:18 -0000	1.22
+++ gcc/fortran/Make-lang.in	13 Sep 2005 19:02:14 -0000
@@ -297,4 +297,4 @@ fortran/trans-intrinsic.o: $(GFORTRAN_TR
   gt-fortran-trans-intrinsic.h
 fortran/dependency.o: $(GFORTRAN_TRANS_DEPS) fortran/dependency.h
 fortran/trans-common.o: $(GFORTRAN_TRANS_DEPS)
-
+fortran/resolve.o: fortran/dependency.h
Index: gcc/fortran/dependency.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/fortran/dependency.c,v
retrieving revision 1.9
diff -u -3 -p -r1.9 dependency.c
--- gcc/fortran/dependency.c	9 Sep 2005 06:34:06 -0000	1.9
+++ gcc/fortran/dependency.c	13 Sep 2005 19:02:14 -0000
@@ -213,24 +213,125 @@ gfc_ref_needs_temporary_p (gfc_ref *ref)
   return false;
 }
 
+/* Some array-returning intrinsics can be implemented by reusing the
+   data from one of the array arguments.  For example, TRANPOSE does
+   not necessarily need to allocate new data: it can be implemented
+   by copying the original array's descriptor and simply swapping the
+   two dimension specifications.
 
-/* Dependency checking for direct function return by reference.
-   Returns true if the arguments of the function depend on the
-   destination.  This is considerably less conservative than other
-   dependencies because many function arguments will already be
-   copied into a temporary.  */
+   If EXPR is a call to such an intrinsic, return the argument
+   whose data can be reused, otherwise return NULL.  */
+
+gfc_expr *
+gfc_get_noncopying_intrinsic_argument (gfc_expr * expr)
+{
+  if (expr->expr_type != EXPR_FUNCTION || !expr->value.function.isym)
+    return 0;
+
+  switch (expr->value.function.isym->generic_id)
+    {
+      case GFC_ISYM_TRANSPOSE:
+	return expr->value.function.actual->expr;
+
+      default:
+	return 0;
+    }
+}
+
+
+/* Return true if array variable VAR could be passed to the same function
+   as argument EXPR without interfering with EXPR.  INTENT is the intent
+   of VAR.
+
+   This is considerably less conservative than other dependencies
+   because many function arguments will already be copied into a
+   temporary.  */
+
+static int
+gfc_check_argument_var_dependency (gfc_expr * var, sym_intent intent,
+				   gfc_expr * expr)
+{
+  gfc_ref *ref;
+  int n;
+
+  gcc_assert (var->expr_type == EXPR_VARIABLE);
+  gcc_assert (var->rank > 0);
+
+  switch (expr->expr_type)
+    {
+      case EXPR_VARIABLE:
+	if (expr->rank > 1)
+	  {
+	    /* This is an array section.  */
+	    for (ref = expr->ref; ref; ref = ref->next)
+	      if (ref->type == REF_ARRAY && ref->u.ar.type != AR_ELEMENT)
+		break;
+	    gcc_assert (ref);
+
+	    /* Vector subscript array sections will be copied to a
+	       temporary.  AR_FULL can't contain vector subscripts.  */
+	    if (ref->u.ar.type == AR_SECTION)
+	      for (n = 0; n < ref->u.ar.dimen; n++)
+		if (ref->u.ar.dimen_type[n] == DIMEN_VECTOR)
+		  return false;
+	  }
+	return gfc_check_dependency (var, expr, NULL, 0);
+
+      case EXPR_ARRAY:
+	return gfc_check_dependency (var, expr, NULL, 0);
+
+      case EXPR_FUNCTION:
+	if (intent != INTENT_IN && expr->inline_noncopying_intrinsic)
+	  {
+	    expr = gfc_get_noncopying_intrinsic_argument (expr);
+	    return gfc_check_argument_var_dependency (var, intent, expr);
+	  }
+	return 0;
+
+      default:
+	return 0;
+    }
+}
+
+
+/* Like gfc_check_argument_var_dependency, but extended to any
+   array expression OTHER, not just variables.  */
+
+static int
+gfc_check_argument_dependency (gfc_expr * other, sym_intent intent,
+			       gfc_expr * expr)
+{
+  switch (other->expr_type)
+    {
+      case EXPR_VARIABLE:
+	return gfc_check_argument_var_dependency (other, intent, expr);
+
+      case EXPR_FUNCTION:
+	if (other->inline_noncopying_intrinsic)
+	  {
+	    other = gfc_get_noncopying_intrinsic_argument (other);
+	    return gfc_check_argument_dependency (other, INTENT_IN, expr);
+	  }
+	return 0;
+
+      default:
+	return 0;
+    }
+}
+
+
+/* Like gfc_check_argument_dependency, but check all the arguments in ACTUAL.
+   FNSYM is the function being called, or NULL if not known.  */
 
 int
-gfc_check_fncall_dependency (gfc_expr * dest, gfc_expr * fncall)
+gfc_check_fncall_dependency (gfc_expr * other, sym_intent intent,
+			     gfc_symbol * fnsym, gfc_actual_arglist * actual)
 {
-  gfc_actual_arglist *actual;
+  gfc_formal_arglist *formal;
   gfc_expr *expr;
 
-  gcc_assert (dest->expr_type == EXPR_VARIABLE
-	  && fncall->expr_type == EXPR_FUNCTION);
-  gcc_assert (fncall->rank > 0);
-
-  for (actual = fncall->value.function.actual; actual; actual = actual->next)
+  formal = fnsym ? fnsym->formal : NULL;
+  for (; actual; actual = actual->next, formal = formal ? formal->next : NULL)
     {
       expr = actual->expr;
 
@@ -238,23 +339,14 @@ gfc_check_fncall_dependency (gfc_expr * 
       if (!expr)
 	continue;
 
-      /* Non-variable expressions will be allocated temporaries anyway.  */
-      switch (expr->expr_type)
-	{
-	case EXPR_VARIABLE:
-	  if (!gfc_ref_needs_temporary_p (expr->ref)
-	      && gfc_check_dependency (dest, expr, NULL, 0))
-	    return 1;
-	  break;
-
-	case EXPR_ARRAY:
-	  if (gfc_check_dependency (dest, expr, NULL, 0))
-	    return 1;
-	  break;
+      /* Skip intent(in) arguments if OTHER itself is intent(in).  */
+      if (formal
+	  && intent == INTENT_IN
+	  && formal->sym->attr.intent == INTENT_IN)
+	continue;
 
-	default:
-	  break;
-	}
+      if (gfc_check_argument_dependency (other, intent, expr))
+	return 1;
     }
 
   return 0;
Index: gcc/fortran/dependency.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/fortran/dependency.h,v
retrieving revision 1.5
diff -u -3 -p -r1.5 dependency.h
--- gcc/fortran/dependency.h	9 Sep 2005 06:34:06 -0000	1.5
+++ gcc/fortran/dependency.h	13 Sep 2005 19:02:14 -0000
@@ -22,7 +22,9 @@ Software Foundation, 51 Franklin Street,
 
 
 bool gfc_ref_needs_temporary_p (gfc_ref *);
-int gfc_check_fncall_dependency (gfc_expr *, gfc_expr *);
+gfc_expr *gfc_get_noncopying_intrinsic_argument (gfc_expr *);
+int gfc_check_fncall_dependency (gfc_expr *, sym_intent, gfc_symbol *,
+				 gfc_actual_arglist *);
 int gfc_check_dependency (gfc_expr *, gfc_expr *, gfc_expr **, int);
 int gfc_is_same_range (gfc_array_ref *, gfc_array_ref *, int, int);
 int gfc_dep_compare_expr (gfc_expr *, gfc_expr *);
Index: gcc/fortran/gfortran.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/fortran/gfortran.h,v
retrieving revision 1.86
diff -u -3 -p -r1.86 gfortran.h
--- gcc/fortran/gfortran.h	9 Sep 2005 18:21:38 -0000	1.86
+++ gcc/fortran/gfortran.h	13 Sep 2005 19:02:15 -0000
@@ -1090,6 +1090,10 @@ typedef struct gfc_expr
 
   /* True if it is converted from Hollerith constant.  */
   unsigned int from_H : 1;
+  /* True if the expression is a call to a function that returns an array,
+     and if we have decided not to allocate temporary data for that array.  */
+  unsigned int inline_noncopying_intrinsic : 1;
+
 
   union
   {
Index: gcc/fortran/resolve.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/fortran/resolve.c,v
retrieving revision 1.52
diff -u -3 -p -r1.52 resolve.c
--- gcc/fortran/resolve.c	31 Aug 2005 12:31:30 -0000	1.52
+++ gcc/fortran/resolve.c	13 Sep 2005 19:02:16 -0000
@@ -24,6 +24,7 @@ Software Foundation, 51 Franklin Street,
 #include "system.h"
 #include "gfortran.h"
 #include "arith.h"  /* For gfc_compare_expr().  */
+#include "dependency.h"
 
 
 /* Stack to push the current if we descend into a block during
@@ -749,6 +750,24 @@ resolve_actual_arglist (gfc_actual_argli
 }
 
 
+/* Go through each actual argument in ACTUAL and see if it can be
+   implemented as an inlined, non-copying intrinsic.  FNSYM is the
+   function being called, or NULL if not known.  */
+
+static void
+find_noncopying_intrinsics (gfc_symbol * fnsym, gfc_actual_arglist * actual)
+{
+  gfc_actual_arglist *ap;
+  gfc_expr *expr;
+
+  for (ap = actual; ap; ap = ap->next)
+    if (ap->expr
+	&& (expr = gfc_get_noncopying_intrinsic_argument (ap->expr))
+	&& !gfc_check_fncall_dependency (expr, INTENT_IN, fnsym, actual))
+      ap->expr->inline_noncopying_intrinsic = 1;
+}
+
+
 /************* Function resolution *************/
 
 /* Resolve a function call known to be generic.
@@ -1095,6 +1114,9 @@ resolve_function (gfc_expr * expr)
 	}
     }
 
+  if (t == SUCCESS)
+    find_noncopying_intrinsics (expr->value.function.esym,
+				expr->value.function.actual);
   return t;
 }
 
@@ -1317,27 +1339,28 @@ resolve_call (gfc_code * c)
   if (resolve_actual_arglist (c->ext.actual) == FAILURE)
     return FAILURE;
 
-  if (c->resolved_sym != NULL)
-    return SUCCESS;
-
-  switch (procedure_kind (c->symtree->n.sym))
-    {
-    case PTYPE_GENERIC:
-      t = resolve_generic_s (c);
-      break;
+  t = SUCCESS;
+  if (c->resolved_sym == NULL)
+    switch (procedure_kind (c->symtree->n.sym))
+      {
+      case PTYPE_GENERIC:
+	t = resolve_generic_s (c);
+	break;
 
-    case PTYPE_SPECIFIC:
-      t = resolve_specific_s (c);
-      break;
+      case PTYPE_SPECIFIC:
+	t = resolve_specific_s (c);
+	break;
 
-    case PTYPE_UNKNOWN:
-      t = resolve_unknown_s (c);
-      break;
+      case PTYPE_UNKNOWN:
+	t = resolve_unknown_s (c);
+	break;
 
-    default:
-      gfc_internal_error ("resolve_subroutine(): bad function type");
-    }
+      default:
+	gfc_internal_error ("resolve_subroutine(): bad function type");
+      }
 
+  if (t == SUCCESS)
+    find_noncopying_intrinsics (c->resolved_sym, c->ext.actual);
   return t;
 }
 
Index: gcc/fortran/trans-array.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/fortran/trans-array.c,v
retrieving revision 1.62
diff -u -3 -p -r1.62 trans-array.c
--- gcc/fortran/trans-array.c	13 Sep 2005 08:07:15 -0000	1.62
+++ gcc/fortran/trans-array.c	13 Sep 2005 19:02:18 -0000
@@ -816,6 +816,94 @@ gfc_get_array_constructor_size (mpz_t * 
 }
 
 
+/* Generate code to tranpose array EXPR by creating a new descriptor
+   in which the dimension specifications have been reversed.  */
+
+void
+gfc_conv_array_transpose (gfc_se * se, gfc_expr * expr)
+{
+  tree dest, src, dest_index, src_index;
+  gfc_loopinfo *loop;
+  gfc_ss_info *dest_info, *src_info;
+  gfc_ss *dest_ss, *src_ss;
+  gfc_se src_se;
+  int n;
+
+  loop = se->loop;
+
+  src_ss = gfc_walk_expr (expr);
+  dest_ss = se->ss;
+
+  src_info = &src_ss->data.info;
+  dest_info = &dest_ss->data.info;
+
+  /* Get a descriptor for EXPR.  */
+  gfc_init_se (&src_se, NULL);
+  gfc_conv_expr_descriptor (&src_se, expr, src_ss);
+  gfc_add_block_to_block (&se->pre, &src_se.pre);
+  gfc_add_block_to_block (&se->post, &src_se.post);
+  src = src_se.expr;
+
+  /* Allocate a new descriptor for the return value.  */
+  dest = gfc_create_var (TREE_TYPE (src), "atmp");
+  dest_info->descriptor = dest;
+  se->expr = dest;
+
+  /* Copy across the dtype field.  */
+  gfc_add_modify_expr (&se->pre,
+		       gfc_conv_descriptor_dtype (dest),
+		       gfc_conv_descriptor_dtype (src));
+
+  /* Copy the dimension information, renumbering dimension 1 to 0 and
+     0 to 1.  */
+  gcc_assert (dest_info->dimen == 2);
+  gcc_assert (src_info->dimen == 2);
+  for (n = 0; n < 2; n++)
+    {
+      dest_info->delta[n] = integer_zero_node;
+      dest_info->start[n] = integer_zero_node;
+      dest_info->stride[n] = integer_one_node;
+      dest_info->dim[n] = n;
+
+      dest_index = gfc_rank_cst[n];
+      src_index = gfc_rank_cst[1 - n];
+
+      gfc_add_modify_expr (&se->pre,
+			   gfc_conv_descriptor_stride (dest, dest_index),
+			   gfc_conv_descriptor_stride (src, src_index));
+
+      gfc_add_modify_expr (&se->pre,
+			   gfc_conv_descriptor_lbound (dest, dest_index),
+			   gfc_conv_descriptor_lbound (src, src_index));
+
+      gfc_add_modify_expr (&se->pre,
+			   gfc_conv_descriptor_ubound (dest, dest_index),
+			   gfc_conv_descriptor_ubound (src, src_index));
+
+      if (!loop->to[n])
+        {
+	  gcc_assert (integer_zerop (loop->from[n]));
+	  loop->to[n] = build2 (MINUS_EXPR, gfc_array_index_type,
+				gfc_conv_descriptor_ubound (dest, dest_index),
+				gfc_conv_descriptor_lbound (dest, dest_index));
+        }
+    }
+
+  /* Copy the data pointer.  */
+  dest_info->data = gfc_conv_descriptor_data_get (src);
+  gfc_conv_descriptor_data_set (&se->pre, dest, dest_info->data);
+
+  /* Copy the offset.  This is not changed by transposition: the top-left
+     element is still at the same offset as before.  */
+  dest_info->offset = gfc_conv_descriptor_offset (src);
+  gfc_add_modify_expr (&se->pre,
+		       gfc_conv_descriptor_offset (dest),
+		       dest_info->offset);
+
+  if (dest_info->dimen > loop->temp_dim)
+    loop->temp_dim = dest_info->dimen;
+}
+
 /* Make sure offset is a variable.  */
 
 static void
Index: gcc/fortran/trans-array.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/fortran/trans-array.h,v
retrieving revision 1.14
diff -u -3 -p -r1.14 trans-array.h
--- gcc/fortran/trans-array.h	9 Sep 2005 06:22:27 -0000	1.14
+++ gcc/fortran/trans-array.h	13 Sep 2005 19:02:18 -0000
@@ -91,6 +91,8 @@ void gfc_conv_tmp_ref (gfc_se *);
 void gfc_conv_expr_descriptor (gfc_se *, gfc_expr *, gfc_ss *);
 /* Convert an array for passing as an actual function parameter.  */
 void gfc_conv_array_parameter (gfc_se *, gfc_expr *, gfc_ss *, int);
+/* Evaluate and transpose a matrix expression.  */
+void gfc_conv_array_transpose (gfc_se *, gfc_expr *);
 
 /* These work with both descriptors and descriptorless arrays.  */
 tree gfc_conv_array_data (tree);
@@ -112,8 +114,6 @@ tree gfc_conv_descriptor_ubound (tree, t
 
 /* Dependency checking for WHERE and FORALL.  */
 int gfc_check_dependency (gfc_expr *, gfc_expr *, gfc_expr **, int);
-/* Dependency checking for function calls.  */
-int gfc_check_fncall_dependency (gfc_expr *, gfc_expr *);
 
 /* Add pre-loop scalarization code for intrinsic functions which require
    special handling.  */
Index: gcc/fortran/trans-expr.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/fortran/trans-expr.c,v
retrieving revision 1.63
diff -u -3 -p -r1.63 trans-expr.c
--- gcc/fortran/trans-expr.c	9 Sep 2005 06:34:07 -0000	1.63
+++ gcc/fortran/trans-expr.c	13 Sep 2005 19:02:19 -0000
@@ -2581,7 +2581,9 @@ gfc_trans_arrayfunc_assign (gfc_expr * e
     return NULL;
 
   /* Check for a dependency.  */
-  if (gfc_check_fncall_dependency (expr1, expr2))
+  if (gfc_check_fncall_dependency (expr1, INTENT_OUT,
+				   expr2->value.function.esym,
+				   expr2->value.function.actual))
     return NULL;
 
   /* The frontend doesn't seem to bother filling in expr->symtree for intrinsic
Index: gcc/fortran/trans-intrinsic.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/fortran/trans-intrinsic.c,v
retrieving revision 1.54
diff -u -3 -p -r1.54 trans-intrinsic.c
--- gcc/fortran/trans-intrinsic.c	13 Sep 2005 08:07:15 -0000	1.54
+++ gcc/fortran/trans-intrinsic.c	13 Sep 2005 19:02:20 -0000
@@ -2689,7 +2689,7 @@ gfc_conv_intrinsic_function (gfc_se * se
 
   name = &expr->value.function.name[2];
 
-  if (expr->rank > 0)
+  if (expr->rank > 0 && !expr->inline_noncopying_intrinsic)
     {
       lib = gfc_is_intrinsic_libcall (expr);
       if (lib != 0)
@@ -2898,6 +2898,16 @@ gfc_conv_intrinsic_function (gfc_se * se
       gfc_conv_intrinsic_bound (se, expr, 0);
       break;
 
+    case GFC_ISYM_TRANSPOSE:
+      if (se->ss && se->ss->useflags)
+	{
+	  gfc_conv_tmp_array_ref (se);
+	  gfc_advance_se_ss_chain (se);
+	}
+      else
+	gfc_conv_array_transpose (se, expr->value.function.actual->expr);
+      break;
+
     case GFC_ISYM_LEN:
       gfc_conv_intrinsic_len (se, expr);
       break;
Index: libgfortran/m4/matmul.m4
===================================================================
RCS file: /cvs/gcc/gcc/libgfortran/m4/matmul.m4,v
retrieving revision 1.15
diff -u -3 -p -r1.15 matmul.m4
--- libgfortran/m4/matmul.m4	17 Aug 2005 02:49:05 -0000	1.15
+++ libgfortran/m4/matmul.m4	13 Sep 2005 19:02:32 -0000
@@ -35,16 +35,29 @@ Boston, MA 02110-1301, USA.  */
 #include "libgfortran.h"'
 include(iparm.m4)dnl
 
-/* This is a C version of the following fortran pseudo-code. The key
-   point is the loop order -- we access all arrays column-first, which
-   improves the performance enough to boost galgel spec score by 50%.
+/* The order of loops is different in the case of plain matrix
+   multiplication C=MATMUL(A,B), and in the frequent special case where
+   the argument A is the temporary result of a TRANSPOSE intrinsic:
+   C=MATMUL(TRANSPOSE(A),B).  Transposed temporaries are detected by
+   looking at their strides.
+
+   The equivalent Fortran pseudo-code is:
 
    DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
-   C = 0
-   DO J=1,N
-     DO K=1,COUNT
+   IF (.NOT.IS_TRANSPOSED(A)) THEN
+     C = 0
+     DO J=1,N
+       DO K=1,COUNT
+         DO I=1,M
+           C(I,J) = C(I,J)+A(I,K)*B(K,J)
+   ELSE
+     DO J=1,N
        DO I=1,M
-         C(I,J) = C(I,J)+A(I,K)*B(K,J)
+         S = 0
+         DO K=1,COUNT
+           S = S+A(I,K)+B(K,J)
+         C(I,J) = S
+   ENDIF
 */
 
 extern void matmul_`'rtype_code (rtype * retarray, rtype * a, rtype * b);
@@ -204,7 +217,28 @@ sinclude(`matmul_asm_'rtype_code`.m4')dn
 	    }
 	}
     }
-  else
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      rtype_name *bbase_y;
+      rtype_name *dest_y;
+      rtype_name *abase_x;
+      rtype_name s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (rtype_name) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n] * bbase_y[n];
+	      dest_y[x] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
     {
       for (y = 0; y < ycount; y++)
 	for (x = 0; x < xcount; x++)
@@ -216,4 +250,25 @@ sinclude(`matmul_asm_'rtype_code`.m4')dn
 	    /* dest[x,y] += a[x,n] * b[n,y] */
 	    dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
     }
+  else
+    {
+      rtype_name *bbase_y;
+      rtype_name *dest_y;
+      rtype_name *abase_x;
+      rtype_name s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (rtype_name) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
 }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]