This is the mail archive of the fortran@gcc.gnu.org mailing list for the GNU Fortran project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Inline DOT_PRODUCT revisited


See the original message from November 2005 here:
http://gcc.gnu.org/ml/gcc-patches/2005-11/msg00686.html

On Thursday 10 November 2005 09:13, Paul Thomas wrote:
> an inline version of DOT_PRODUCT, which is never slower that the
> original library version and is very much faster for small vectors.

Amazingly your patch still applied almost cleanly, and it seems to do
what it is supposed to do.  The test case you had for dot_product was
useless because it got optimized to an empty loop (which for whatever
reason was not removed itself).  The new test case is attached below.
As is the updated patch, which is just a re-diff.

I would _very_ much like to have this patch in GCC 4.1.1, but what do
others want?  I think the patch is very safe (assuming it is correct
but I haven't looked at that much yet) and it implements dot_product
in the same way as every other fortran compiler I have access to, so
it can't be such a bad idea ;-)


> It is proposed to retain the library version of the DOT_PRODUCT because
> the inline version does not work for types longer that 8 bytes and for a
> future implementation, where the library is used with the option -Os.

I'm not sure I'd want to keep the library function.  If we can implement
simple arithmetic for all types, then implementing dot_product should not
be a problem either.

Gr.
Steven

	PR fortran/24520
	* trans-intrinsic.c (gfc_conv_intrinsic_dot_product): New function to
	implement dot_product inline.
	(gfc_conv_intrinsic_function): Change call from library dot_product to
	inline version.

Index: trans-intrinsic.c
===================================================================
--- trans-intrinsic.c	(revision 111441)
+++ trans-intrinsic.c	(working copy)
@@ -1561,6 +1561,102 @@ gfc_conv_intrinsic_arith (gfc_se * se, g
   se->expr = resvar;
 }
 
+
+/* Inline implementation of the dot_product intrinsic. This function
+   is based on gfc_conv_intrinsic_arith (the previous function).  */
+static void
+gfc_conv_intrinsic_dot_product (gfc_se * se, gfc_expr * expr)
+{
+  tree resvar;
+  tree type;
+  stmtblock_t body;
+  stmtblock_t block;
+  tree tmp;
+  gfc_loopinfo loop;
+  gfc_actual_arglist *actual;
+  gfc_ss *arrayss1, *arrayss2;
+  gfc_se arrayse1, arrayse2;
+  gfc_expr *arrayexpr1, *arrayexpr2;
+
+  /* Use the library for kind > 8.  */
+  if (expr->ts.kind > 8)
+    {
+      gfc_conv_intrinsic_funcall (se, expr);
+      return;
+    }
+
+  type = gfc_typenode_for_spec (&expr->ts);
+
+  /* Initialize the result.  */
+  resvar = gfc_create_var (type, "val");
+  tmp = gfc_build_const (type, integer_zero_node);
+  gfc_add_modify_expr (&se->pre, resvar, tmp);
+
+  /* Walk argument #1.  */
+  actual = expr->value.function.actual;
+  arrayexpr1 = actual->expr;
+  arrayss1 = gfc_walk_expr (arrayexpr1);
+  gcc_assert (arrayss1 != gfc_ss_terminator);
+
+  /* Walk argument #2.  */
+  actual = actual->next;
+  arrayexpr2 = actual->expr;
+  arrayss2 = gfc_walk_expr (arrayexpr2);
+  gcc_assert (arrayss2 != gfc_ss_terminator);
+
+  /* Initialize the scalarizer.  */
+  gfc_init_loopinfo (&loop);
+  gfc_add_ss_to_loop (&loop, arrayss1);
+  gfc_add_ss_to_loop (&loop, arrayss2);
+
+  /* Initialize the loop.  */
+  gfc_conv_ss_startstride (&loop);
+  gfc_conv_loop_setup (&loop);
+
+  gfc_mark_ss_chain_used (arrayss1, 1);
+  gfc_mark_ss_chain_used (arrayss2, 1);
+
+  /* Generate the loop body.  */
+  gfc_start_scalarized_body (&loop, &body);
+  gfc_init_block (&block);
+
+  /* Make the tree expression for [conjg(]array1[)].  */
+  gfc_init_se (&arrayse1, NULL);
+  gfc_copy_loopinfo_to_se (&arrayse1, &loop);
+  arrayse1.ss = arrayss1;
+  gfc_conv_expr (&arrayse1, arrayexpr1);
+  if (expr->ts.type == BT_COMPLEX)
+    arrayse1.expr = build1 (CONJ_EXPR, type, arrayse1.expr);
+  gfc_add_block_to_block (&block, &arrayse1.pre);
+
+  /* Make the tree expression for array2.  */
+  gfc_init_se (&arrayse2, NULL);
+  gfc_copy_loopinfo_to_se (&arrayse2, &loop);
+  arrayse2.ss = arrayss2;
+  gfc_conv_expr (&arrayse2, arrayexpr2);
+  gfc_add_block_to_block (&block, &arrayse2.pre);
+
+  /* Do the actual product and sum.  */
+  tmp = build2 (MULT_EXPR, type, arrayse1.expr, arrayse2.expr);
+  tmp = build2 (PLUS_EXPR, type, resvar, tmp);
+  gfc_add_modify_expr (&block, resvar, tmp);
+
+  /* Finish up the loop block and the loop.  */
+  gfc_add_block_to_block (&block, &arrayse2.post);
+  gfc_add_block_to_block (&block, &arrayse1.post);
+
+  tmp = gfc_finish_block (&block);
+  gfc_add_expr_to_block (&body, tmp);
+
+  gfc_trans_scalarizing_loops (&loop, &body);
+  gfc_add_block_to_block (&se->pre, &loop.pre);
+  gfc_add_block_to_block (&se->pre, &loop.post);
+  gfc_cleanup_loop (&loop);
+
+  se->expr = resvar;
+}
+
+
 static void
 gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, int op)
 {
@@ -3102,6 +3198,10 @@ gfc_conv_intrinsic_function (gfc_se * se
       gfc_conv_intrinsic_dim (se, expr);
       break;
 
+    case GFC_ISYM_DOT_PRODUCT:
+      gfc_conv_intrinsic_dot_product (se, expr);
+      break;
+
     case GFC_ISYM_DPROD:
       gfc_conv_intrinsic_dprod (se, expr);
       break;
@@ -3271,7 +3371,6 @@ gfc_conv_intrinsic_function (gfc_se * se
       break;
 
     case GFC_ISYM_CHDIR:
-    case GFC_ISYM_DOT_PRODUCT:
     case GFC_ISYM_ETIME:
     case GFC_ISYM_FGET:
     case GFC_ISYM_FGETC:

Attachment: dotprod_tests.f90
Description: Text document


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]