This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[Patch, fortran] [62..66/66] inline sum and product: Inline sum.
- From: Mikael Morin <mikael dot morin at sfr dot fr>
- To: gfortran <fortran at gcc dot gnu dot org> , GCC patches <gcc-patches at gcc dot gnu dot org>
- Date: Fri, 28 Oct 2011 01:33:39 +0200 (CEST)
- Subject: [Patch, fortran] [62..66/66] inline sum and product: Inline sum.
- References: <20111027232818.18581.901@gimli.local>
This is the purpose of all the serie of patches: inline sum and product.
Patches 62..65, possibly don't deserve a patch of their own, they just make
the changes in gfc_conv_intrinsic_arith easier to read.
The combined diff (context, ignoring indent changes) also attached here.
Patch 62: Change conditions.
Patch 63: Update argument handling.
Patch 64: Change loop usage.
Patch 65: Change gfc_se structs initializers.
Patch 66: Inline sum.
diff --git a/trans-array.c b/trans-array.c
index acd9aec18fefc6631ad443c5dff2d3014a9d6565..262743d0d3779b4f02a63d604bd9a621401ae84e 100644
*** a/trans-array.c
--- b/trans-array.c
*************** gfc_conv_expr_descriptor (gfc_se * se, g
*** 6187,6193 ****
gcc_assert ((expr->value.function.esym != NULL
&& expr->value.function.esym->attr.elemental)
|| (expr->value.function.isym != NULL
! && expr->value.function.isym->elemental));
else
gcc_assert (ss_type == GFC_SS_INTRINSIC);
--- 6187,6194 ----
gcc_assert ((expr->value.function.esym != NULL
&& expr->value.function.esym->attr.elemental)
|| (expr->value.function.isym != NULL
! && expr->value.function.isym->elemental)
! || gfc_inline_intrinsic_function_p (expr));
else
gcc_assert (ss_type == GFC_SS_INTRINSIC);
diff --git a/trans-intrinsic.c b/trans-intrinsic.c
index 3cdc1e0970aa4f787a60c96e50572a0bb1cb2a6e..973f912a624bdd442b3851471432c358118438d6 100644
*** a/trans-intrinsic.c
--- b/trans-intrinsic.c
*************** gfc_conv_intrinsic_count (gfc_se * se, g
*** 2557,2562 ****
--- 2557,2576 ----
se->expr = resvar;
}
+
+ /* Update given gfc_se to have ss component pointing to the nested gfc_ss
+ struct and return the corresponding loopinfo. */
+
+ static gfc_loopinfo *
+ enter_nested_loop (gfc_se *se)
+ {
+ se->ss = se->ss->nested_ss;
+ gcc_assert (se->ss == se->ss->loop->ss);
+
+ return se->ss->loop;
+ }
+
+
/* Inline implementation of the sum and product intrinsics. */
static void
gfc_conv_intrinsic_arith (gfc_se * se, gfc_expr * expr, enum tree_code op,
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2568,2587 ****
stmtblock_t body;
stmtblock_t block;
tree tmp;
! gfc_loopinfo loop;
! gfc_actual_arglist *actual;
! gfc_ss *arrayss;
! gfc_ss *maskss;
gfc_se arrayse;
gfc_se maskse;
gfc_expr *arrayexpr;
gfc_expr *maskexpr;
! if (se->ss)
{
! gfc_conv_intrinsic_funcall (se, expr);
! return;
}
type = gfc_typenode_for_spec (&expr->ts);
/* Initialize the result. */
--- 2582,2604 ----
stmtblock_t body;
stmtblock_t block;
tree tmp;
! gfc_loopinfo loop, *ploop;
! gfc_actual_arglist *arg_array, *arg_mask;
! gfc_ss *arrayss = NULL;
! gfc_ss *maskss = NULL;
gfc_se arrayse;
gfc_se maskse;
+ gfc_se *parent_se;
gfc_expr *arrayexpr;
gfc_expr *maskexpr;
! if (expr->rank > 0)
{
! gcc_assert (gfc_inline_intrinsic_function_p (expr));
! parent_se = se;
}
+ else
+ parent_se = NULL;
type = gfc_typenode_for_spec (&expr->ts);
/* Initialize the result. */
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2608,2630 ****
gfc_add_modify (&se->pre, resvar, tmp);
! /* Walk the arguments. */
! actual = expr->value.function.actual;
! arrayexpr = actual->expr;
! arrayss = gfc_walk_expr (arrayexpr);
! gcc_assert (arrayss != gfc_ss_terminator);
if (op == NE_EXPR || norm2)
/* PARITY and NORM2. */
maskexpr = NULL;
else
{
! actual = actual->next->next;
! gcc_assert (actual);
! maskexpr = actual->expr;
}
! if (maskexpr && maskexpr->rank != 0)
{
maskss = gfc_walk_expr (maskexpr);
gcc_assert (maskss != gfc_ss_terminator);
--- 2625,2651 ----
gfc_add_modify (&se->pre, resvar, tmp);
! arg_array = expr->value.function.actual;
!
! arrayexpr = arg_array->expr;
if (op == NE_EXPR || norm2)
/* PARITY and NORM2. */
maskexpr = NULL;
else
{
! arg_mask = arg_array->next->next;
! gcc_assert (arg_mask != NULL);
! maskexpr = arg_mask->expr;
}
! if (expr->rank == 0)
! {
! /* Walk the arguments. */
! arrayss = gfc_walk_expr (arrayexpr);
! gcc_assert (arrayss != gfc_ss_terminator);
!
! if (maskexpr && maskexpr->rank > 0)
{
maskss = gfc_walk_expr (maskexpr);
gcc_assert (maskss != gfc_ss_terminator);
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2635,2641 ****
/* Initialize the scalarizer. */
gfc_init_loopinfo (&loop);
gfc_add_ss_to_loop (&loop, arrayss);
! if (maskss)
gfc_add_ss_to_loop (&loop, maskss);
/* Initialize the loop. */
--- 2656,2662 ----
/* Initialize the scalarizer. */
gfc_init_loopinfo (&loop);
gfc_add_ss_to_loop (&loop, arrayss);
! if (maskexpr && maskexpr->rank > 0)
gfc_add_ss_to_loop (&loop, maskss);
/* Initialize the loop. */
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2643,2658 ****
gfc_conv_loop_setup (&loop, &expr->where);
gfc_mark_ss_chain_used (arrayss, 1);
! if (maskss)
gfc_mark_ss_chain_used (maskss, 1);
/* Generate the loop body. */
! gfc_start_scalarized_body (&loop, &body);
/* If we have a mask, only add this element if the mask is set. */
! if (maskss)
{
! gfc_init_se (&maskse, NULL);
! gfc_copy_loopinfo_to_se (&maskse, &loop);
maskse.ss = maskss;
gfc_conv_expr_val (&maskse, maskexpr);
gfc_add_block_to_block (&body, &maskse.pre);
--- 2664,2689 ----
gfc_conv_loop_setup (&loop, &expr->where);
gfc_mark_ss_chain_used (arrayss, 1);
! if (maskexpr && maskexpr->rank > 0)
gfc_mark_ss_chain_used (maskss, 1);
+
+ ploop = &loop;
+ }
+ else
+ /* All the work has been done in the parent loops. */
+ ploop = enter_nested_loop (se);
+
+ gcc_assert (ploop);
+
/* Generate the loop body. */
! gfc_start_scalarized_body (ploop, &body);
/* If we have a mask, only add this element if the mask is set. */
! if (maskexpr && maskexpr->rank > 0)
{
! gfc_init_se (&maskse, parent_se);
! gfc_copy_loopinfo_to_se (&maskse, ploop);
! if (expr->rank == 0)
maskse.ss = maskss;
gfc_conv_expr_val (&maskse, maskexpr);
gfc_add_block_to_block (&body, &maskse.pre);
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2663,2670 ****
gfc_init_block (&block);
/* Do the actual summation/product. */
! gfc_init_se (&arrayse, NULL);
! gfc_copy_loopinfo_to_se (&arrayse, &loop);
arrayse.ss = arrayss;
gfc_conv_expr_val (&arrayse, arrayexpr);
gfc_add_block_to_block (&block, &arrayse.pre);
--- 2694,2702 ----
gfc_init_block (&block);
/* Do the actual summation/product. */
! gfc_init_se (&arrayse, parent_se);
! gfc_copy_loopinfo_to_se (&arrayse, ploop);
! if (expr->rank == 0)
arrayse.ss = arrayss;
gfc_conv_expr_val (&arrayse, arrayexpr);
gfc_add_block_to_block (&block, &arrayse.pre);
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2740,2746 ****
gfc_add_block_to_block (&block, &arrayse.post);
! if (maskss)
{
/* We enclose the above in if (mask) {...} . */
--- 2772,2778 ----
gfc_add_block_to_block (&block, &arrayse.post);
! if (maskexpr && maskexpr->rank > 0)
{
/* We enclose the above in if (mask) {...} . */
*************** gfc_conv_intrinsic_arith (gfc_se * se, g
*** 2752,2781 ****
tmp = gfc_finish_block (&block);
gfc_add_expr_to_block (&body, tmp);
! gfc_trans_scalarizing_loops (&loop, &body);
/* For a scalar mask, enclose the loop in an if statement. */
! if (maskexpr && maskss == NULL)
{
- gfc_init_se (&maskse, NULL);
- gfc_conv_expr_val (&maskse, maskexpr);
gfc_init_block (&block);
! gfc_add_block_to_block (&block, &loop.pre);
! gfc_add_block_to_block (&block, &loop.post);
tmp = gfc_finish_block (&block);
tmp = build3_v (COND_EXPR, maskse.expr, tmp,
build_empty_stmt (input_location));
gfc_add_expr_to_block (&block, tmp);
gfc_add_block_to_block (&se->pre, &block);
}
else
{
! gfc_add_block_to_block (&se->pre, &loop.pre);
! gfc_add_block_to_block (&se->pre, &loop.post);
}
! gfc_cleanup_loop (&loop);
if (norm2)
{
--- 2784,2826 ----
tmp = gfc_finish_block (&block);
gfc_add_expr_to_block (&body, tmp);
! gfc_trans_scalarizing_loops (ploop, &body);
/* For a scalar mask, enclose the loop in an if statement. */
! if (maskexpr && maskexpr->rank == 0)
{
gfc_init_block (&block);
! gfc_add_block_to_block (&block, &ploop->pre);
! gfc_add_block_to_block (&block, &ploop->post);
tmp = gfc_finish_block (&block);
+ if (expr->rank > 0)
+ {
+ tmp = build3_v (COND_EXPR, se->ss->info->data.scalar.value, tmp,
+ build_empty_stmt (input_location));
+ gfc_advance_se_ss_chain (se);
+ }
+ else
+ {
+ gcc_assert (expr->rank == 0);
+ gfc_init_se (&maskse, NULL);
+ gfc_conv_expr_val (&maskse, maskexpr);
tmp = build3_v (COND_EXPR, maskse.expr, tmp,
build_empty_stmt (input_location));
+ }
+
gfc_add_expr_to_block (&block, tmp);
gfc_add_block_to_block (&se->pre, &block);
+ gcc_assert (se->post.head == NULL);
}
else
{
! gfc_add_block_to_block (&se->pre, &ploop->pre);
! gfc_add_block_to_block (&se->pre, &ploop->post);
}
! if (expr->rank == 0)
! gfc_cleanup_loop (ploop);
if (norm2)
{
*************** walk_inline_intrinsic_transpose (gfc_ss
*** 6795,6806 ****
--- 6840,6966 ----
}
+ /* Move the given dimension of the given gfc_ss list to a nested gfc_ss list.
+ This has the side effect of reversing the nested list, so there is no
+ need to call gfc_reverse_ss on it (the given list is assumed not to be
+ reversed yet). */
+
+ static gfc_ss *
+ nest_loop_dimension (gfc_ss *ss, int dim)
+ {
+ int ss_dim, i;
+ gfc_ss *new_ss, *prev_ss = gfc_ss_terminator;
+ gfc_loopinfo *new_loop;
+
+ gcc_assert (ss != gfc_ss_terminator);
+
+ for (; ss != gfc_ss_terminator; ss = ss->next)
+ {
+ new_ss = gfc_get_ss ();
+ new_ss->next = prev_ss;
+ new_ss->parent = ss;
+ new_ss->info = ss->info;
+ new_ss->info->refcount++;
+ if (ss->dimen != 0)
+ {
+ gcc_assert (ss->info->type != GFC_SS_SCALAR
+ && ss->info->type != GFC_SS_REFERENCE);
+
+ new_ss->dimen = 1;
+ new_ss->dim[0] = ss->dim[dim];
+
+ gcc_assert (dim < ss->dimen);
+
+ ss_dim = --ss->dimen;
+ for (i = dim; i < ss_dim; i++)
+ ss->dim[i] = ss->dim[i + 1];
+
+ ss->dim[ss_dim] = 0;
+ }
+ prev_ss = new_ss;
+
+ if (ss->nested_ss)
+ {
+ ss->nested_ss->parent = new_ss;
+ new_ss->nested_ss = ss->nested_ss;
+ }
+ ss->nested_ss = new_ss;
+ }
+
+ new_loop = gfc_get_loopinfo ();
+ gfc_init_loopinfo (new_loop);
+
+ gcc_assert (prev_ss != NULL);
+ gcc_assert (prev_ss != gfc_ss_terminator);
+ gfc_add_ss_to_loop (new_loop, prev_ss);
+ return new_ss->parent;
+ }
+
+
+ /* Create the gfc_ss list for the SUM/PRODUCT arguments when the function
+ is to be inlined. */
+
+ static gfc_ss *
+ walk_inline_intrinsic_arith (gfc_ss *ss, gfc_expr *expr)
+ {
+ gfc_ss *tmp_ss, *tail, *array_ss;
+ gfc_actual_arglist *arg1, *arg2, *arg3;
+ int sum_dim;
+ bool scalar_mask = false;
+
+ /* The rank of the result will be determined later. */
+ arg1 = expr->value.function.actual;
+ arg2 = arg1->next;
+ arg3 = arg2->next;
+ gcc_assert (arg3 != NULL);
+
+ if (expr->rank == 0)
+ return ss;
+
+ tmp_ss = gfc_ss_terminator;
+
+ if (arg3->expr)
+ {
+ gfc_ss *mask_ss;
+
+ mask_ss = gfc_walk_subexpr (tmp_ss, arg3->expr);
+ if (mask_ss == tmp_ss)
+ scalar_mask = 1;
+
+ tmp_ss = mask_ss;
+ }
+
+ array_ss = gfc_walk_subexpr (tmp_ss, arg1->expr);
+ gcc_assert (array_ss != tmp_ss);
+
+ /* Odd thing: If the mask is scalar, it is used by the frontend after
+ the array (to make it array around the nested loop). Thus it shall
+ be after array_ss once the gfc_ss list is reversed. */
+ if (scalar_mask)
+ tmp_ss = gfc_get_scalar_ss (array_ss, arg3->expr);
+ else
+ tmp_ss = array_ss;
+
+ /* "Hide" the dimension on which we will sum in the first arg's scalarization
+ chain. */
+ sum_dim = mpz_get_si (arg2->expr->value.integer) - 1;
+ tail = nest_loop_dimension (tmp_ss, sum_dim);
+ tail->next = ss;
+
+ return tmp_ss;
+ }
+
+
static gfc_ss *
walk_inline_intrinsic_function (gfc_ss * ss, gfc_expr * expr)
{
switch (expr->value.function.isym->id)
{
+ case GFC_ISYM_PRODUCT:
+ case GFC_ISYM_SUM:
+ return walk_inline_intrinsic_arith (ss, expr);
+
case GFC_ISYM_TRANSPOSE:
return walk_inline_intrinsic_transpose (ss, expr);
*************** gfc_walk_intrinsic_libfunc (gfc_ss * ss,
*** 6862,6872 ****
--- 7022,7047 ----
bool
gfc_inline_intrinsic_function_p (gfc_expr *expr)
{
+ gfc_actual_arglist *args;
+
if (!expr->value.function.isym)
return false;
switch (expr->value.function.isym->id)
{
+ case GFC_ISYM_PRODUCT:
+ case GFC_ISYM_SUM:
+ /* Disable inline expansion if code size matters. */
+ if (optimize_size)
+ return false;
+
+ args = expr->value.function.actual;
+ /* We need to be able to subset the SUM argument at compile-time. */
+ if (args->next->expr && args->next->expr->expr_type != EXPR_CONSTANT)
+ return false;
+
+ return true;
+
case GFC_ISYM_TRANSPOSE:
return true;
diff --git a/trans.h b/trans.h
index 5757865b3a180a32e5baa320c56d235924df68dc..22033d38d157f5c85eba6fcb8ee92ab28dc22535 100644
*** a/trans.h
--- b/trans.h
*************** typedef struct gfc_loopinfo
*** 310,315 ****
--- 310,316 ----
}
gfc_loopinfo;
+ #define gfc_get_loopinfo() XCNEW (gfc_loopinfo)
/* Information about a symbol that has been shadowed by a temporary. */
typedef struct