This is the mail archive of the fortran@gcc.gnu.org mailing list for the GNU Fortran project.
| Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
|---|---|---|
| Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
| Other format: | [Raw text] | |
See the original message from November 2005 here: http://gcc.gnu.org/ml/gcc-patches/2005-11/msg00686.html On Thursday 10 November 2005 09:13, Paul Thomas wrote: > an inline version of DOT_PRODUCT, which is never slower that the > original library version and is very much faster for small vectors. Amazingly your patch still applied almost cleanly, and it seems to do what it is supposed to do. The test case you had for dot_product was useless because it got optimized to an empty loop (which for whatever reason was not removed itself). The new test case is attached below. As is the updated patch, which is just a re-diff. I would _very_ much like to have this patch in GCC 4.1.1, but what do others want? I think the patch is very safe (assuming it is correct but I haven't looked at that much yet) and it implements dot_product in the same way as every other fortran compiler I have access to, so it can't be such a bad idea ;-) > It is proposed to retain the library version of the DOT_PRODUCT because > the inline version does not work for types longer that 8 bytes and for a > future implementation, where the library is used with the option -Os. I'm not sure I'd want to keep the library function. If we can implement simple arithmetic for all types, then implementing dot_product should not be a problem either. Gr. Steven
PR fortran/24520
* trans-intrinsic.c (gfc_conv_intrinsic_dot_product): New function to
implement dot_product inline.
(gfc_conv_intrinsic_function): Change call from library dot_product to
inline version.
Index: trans-intrinsic.c
===================================================================
--- trans-intrinsic.c (revision 111441)
+++ trans-intrinsic.c (working copy)
@@ -1561,6 +1561,102 @@ gfc_conv_intrinsic_arith (gfc_se * se, g
se->expr = resvar;
}
+
+/* Inline implementation of the dot_product intrinsic. This function
+ is based on gfc_conv_intrinsic_arith (the previous function). */
+static void
+gfc_conv_intrinsic_dot_product (gfc_se * se, gfc_expr * expr)
+{
+ tree resvar;
+ tree type;
+ stmtblock_t body;
+ stmtblock_t block;
+ tree tmp;
+ gfc_loopinfo loop;
+ gfc_actual_arglist *actual;
+ gfc_ss *arrayss1, *arrayss2;
+ gfc_se arrayse1, arrayse2;
+ gfc_expr *arrayexpr1, *arrayexpr2;
+
+ /* Use the library for kind > 8. */
+ if (expr->ts.kind > 8)
+ {
+ gfc_conv_intrinsic_funcall (se, expr);
+ return;
+ }
+
+ type = gfc_typenode_for_spec (&expr->ts);
+
+ /* Initialize the result. */
+ resvar = gfc_create_var (type, "val");
+ tmp = gfc_build_const (type, integer_zero_node);
+ gfc_add_modify_expr (&se->pre, resvar, tmp);
+
+ /* Walk argument #1. */
+ actual = expr->value.function.actual;
+ arrayexpr1 = actual->expr;
+ arrayss1 = gfc_walk_expr (arrayexpr1);
+ gcc_assert (arrayss1 != gfc_ss_terminator);
+
+ /* Walk argument #2. */
+ actual = actual->next;
+ arrayexpr2 = actual->expr;
+ arrayss2 = gfc_walk_expr (arrayexpr2);
+ gcc_assert (arrayss2 != gfc_ss_terminator);
+
+ /* Initialize the scalarizer. */
+ gfc_init_loopinfo (&loop);
+ gfc_add_ss_to_loop (&loop, arrayss1);
+ gfc_add_ss_to_loop (&loop, arrayss2);
+
+ /* Initialize the loop. */
+ gfc_conv_ss_startstride (&loop);
+ gfc_conv_loop_setup (&loop);
+
+ gfc_mark_ss_chain_used (arrayss1, 1);
+ gfc_mark_ss_chain_used (arrayss2, 1);
+
+ /* Generate the loop body. */
+ gfc_start_scalarized_body (&loop, &body);
+ gfc_init_block (&block);
+
+ /* Make the tree expression for [conjg(]array1[)]. */
+ gfc_init_se (&arrayse1, NULL);
+ gfc_copy_loopinfo_to_se (&arrayse1, &loop);
+ arrayse1.ss = arrayss1;
+ gfc_conv_expr (&arrayse1, arrayexpr1);
+ if (expr->ts.type == BT_COMPLEX)
+ arrayse1.expr = build1 (CONJ_EXPR, type, arrayse1.expr);
+ gfc_add_block_to_block (&block, &arrayse1.pre);
+
+ /* Make the tree expression for array2. */
+ gfc_init_se (&arrayse2, NULL);
+ gfc_copy_loopinfo_to_se (&arrayse2, &loop);
+ arrayse2.ss = arrayss2;
+ gfc_conv_expr (&arrayse2, arrayexpr2);
+ gfc_add_block_to_block (&block, &arrayse2.pre);
+
+ /* Do the actual product and sum. */
+ tmp = build2 (MULT_EXPR, type, arrayse1.expr, arrayse2.expr);
+ tmp = build2 (PLUS_EXPR, type, resvar, tmp);
+ gfc_add_modify_expr (&block, resvar, tmp);
+
+ /* Finish up the loop block and the loop. */
+ gfc_add_block_to_block (&block, &arrayse2.post);
+ gfc_add_block_to_block (&block, &arrayse1.post);
+
+ tmp = gfc_finish_block (&block);
+ gfc_add_expr_to_block (&body, tmp);
+
+ gfc_trans_scalarizing_loops (&loop, &body);
+ gfc_add_block_to_block (&se->pre, &loop.pre);
+ gfc_add_block_to_block (&se->pre, &loop.post);
+ gfc_cleanup_loop (&loop);
+
+ se->expr = resvar;
+}
+
+
static void
gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, int op)
{
@@ -3102,6 +3198,10 @@ gfc_conv_intrinsic_function (gfc_se * se
gfc_conv_intrinsic_dim (se, expr);
break;
+ case GFC_ISYM_DOT_PRODUCT:
+ gfc_conv_intrinsic_dot_product (se, expr);
+ break;
+
case GFC_ISYM_DPROD:
gfc_conv_intrinsic_dprod (se, expr);
break;
@@ -3271,7 +3371,6 @@ gfc_conv_intrinsic_function (gfc_se * se
break;
case GFC_ISYM_CHDIR:
- case GFC_ISYM_DOT_PRODUCT:
case GFC_ISYM_ETIME:
case GFC_ISYM_FGET:
case GFC_ISYM_FGETC:
Attachment:
dotprod_tests.f90
Description: Text document
| Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
|---|---|---|
| Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |