This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Improve Fortran non-constant step DO translation (PR fortran/37536)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org, fortran at redhat dot com
- Date: Wed, 17 Sep 2008 11:49:18 -0400
- Subject: [PATCH] Improve Fortran non-constant step DO translation (PR fortran/37536)
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
When it is unknown whether DO step is positive or negative and DOVAR
has INTEGER_TYPE, Fortran generates unnecessarily complicated code.
if (D.1533 > 0) goto <D.1542>; else goto <D.1543>;
<D.1542>:
D.1544 = D.1532 - e.2;
iftmp.1 = (character(kind=4)) D.1544;
goto <D.1545>;
<D.1543>:
D.1546 = e.2 - D.1532;
iftmp.1 = (character(kind=4)) D.1546;
<D.1545>:
D.1547 = ABS_EXPR <D.1533>;
D.1548 = (character(kind=4)) D.1547;
countm1.0 = iftmp.1 / D.1548;
e = e.2;
if (D.1533 > 0) goto <D.1550>; else goto <D.1551>;
<D.1550>:
iftmp.3 = D.1532 < e.2;
goto <D.1552>;
<D.1551>:
iftmp.3 = D.1532 > e.2;
<D.1552>:
if (iftmp.3 != 0) goto L.2; else goto <D.1553>;
<D.1553>:
<D.1554>:
loop... branch to L.2 when done
<goto D.1554>;
<L.2>:
which e.g. leads on most arches into storing a comparison flag
into a variable and then comparing it, which is on many arches costly.
With the patch below, we generate instead:
e = e.1;
if (D.1533 > 0) goto <D.1541>; else goto <D.1542>;
<D.1541>:
if (D.1532 < e.1) goto L.2; else goto <D.1543>;
<D.1543>:
D.1544 = D.1532 - e.1;
D.1545 = (character(kind=4)) D.1544;
D.1546 = (character(kind=4)) D.1533;
countm1.0 = D.1545 / D.1546;
goto <D.1547>;
<D.1542>:
if (D.1532 > e.1) goto L.2; else goto <D.1548>;
<D.1548>:
D.1549 = e.1 - D.1532;
D.1550 = (character(kind=4)) D.1549;
D.1551 = -D.1533;
D.1552 = (character(kind=4)) D.1551;
countm1.0 = D.1550 / D.1552;
<D.1547>:
<D.1553>:
loop... branch to L.2 when done
<goto <D.1553>;
<L.2>:
i.e. we merge the 3 conditionals with step > 0 condition (to - from vs.
from - to, ABS_EXPR (step) and the empty loop condition)
into one bigger one.
Bootstrapped/regtested on x86_64-linux, on the given testcase smaller code
is generated on x86_64-linux at least.
In the PR Luis said the patch improved slightly swim on ppc32 and art on
ppc64.
Ok for trunk?
2008-09-16 Jakub Jelinek <jakub@redhat.com>
PR fortran/37536
* trans-stmt.c (gfc_trans_do): Optimize integer type non-simple
do loop initialization.
--- gcc/fortran/trans-stmt.c.jj 2008-09-05 12:55:53.000000000 +0200
+++ gcc/fortran/trans-stmt.c 2008-09-16 14:40:00.000000000 +0200
@@ -834,7 +834,6 @@ gfc_trans_do (gfc_code * code)
tree from;
tree to;
tree step;
- tree empty;
tree countm1;
tree type;
tree utype;
@@ -875,56 +874,88 @@ gfc_trans_do (gfc_code * code)
&& (integer_onep (step)
|| tree_int_cst_equal (step, integer_minus_one_node)))
return gfc_trans_simple_do (code, &block, dovar, from, to, step);
-
- /* We need a special check for empty loops:
- empty = (step > 0 ? to < from : to > from); */
+
pos_step = fold_build2 (GT_EXPR, boolean_type_node, step,
fold_convert (type, integer_zero_node));
- empty = fold_build3 (COND_EXPR, boolean_type_node, pos_step,
- fold_build2 (LT_EXPR, boolean_type_node, to, from),
- fold_build2 (GT_EXPR, boolean_type_node, to, from));
- /* Initialize loop count. This code is executed before we enter the
- loop body. We generate: countm1 = abs(to - from) / abs(step). */
+ if (TREE_CODE (type) == INTEGER_TYPE)
+ utype = unsigned_type_for (type);
+ else
+ utype = unsigned_type_for (gfc_array_index_type);
+ countm1 = gfc_create_var (utype, "countm1");
+
+ /* Cycle and exit statements are implemented with gotos. */
+ cycle_label = gfc_build_label_decl (NULL_TREE);
+ exit_label = gfc_build_label_decl (NULL_TREE);
+ TREE_USED (exit_label) = 1;
+
+ /* Initialize the DO variable: dovar = from. */
+ gfc_add_modify (&block, dovar, from);
+
+ /* Initialize loop count and jump to exit label if the loop is empty.
+ This code is executed before we enter the loop body. We generate:
+ if (step > 0)
+ {
+ if (to < from) goto exit_label;
+ countm1 = to - from / step;
+ }
+ else
+ {
+ if (to > from) goto exit_label;
+ countm1 = from - to / -step;
+ } */
if (TREE_CODE (type) == INTEGER_TYPE)
{
- tree ustep;
+ tree pos, neg;
- utype = unsigned_type_for (type);
+ tmp = fold_build2 (LT_EXPR, boolean_type_node, to, from);
+ pos = fold_build3 (COND_EXPR, void_type_node, tmp,
+ build1_v (GOTO_EXPR, exit_label),
+ build_empty_stmt ());
+ tmp = fold_build2 (MINUS_EXPR, type, to, from);
+ tmp = fold_convert (utype, tmp);
+ tmp = fold_build2 (TRUNC_DIV_EXPR, utype, tmp,
+ fold_convert (utype, step));
+ tmp = build2 (MODIFY_EXPR, void_type_node, countm1, tmp);
+ pos = build2 (COMPOUND_EXPR, void_type_node, pos, tmp);
+
+ tmp = fold_build2 (GT_EXPR, boolean_type_node, to, from);
+ neg = fold_build3 (COND_EXPR, void_type_node, tmp,
+ build1_v (GOTO_EXPR, exit_label),
+ build_empty_stmt ());
+ tmp = fold_build2 (MINUS_EXPR, type, from, to);
+ tmp = fold_convert (utype, tmp);
+ tmp = fold_build2 (TRUNC_DIV_EXPR, utype, tmp,
+ fold_convert (utype, fold_build1 (NEGATE_EXPR,
+ type, step)));
+ tmp = build2 (MODIFY_EXPR, void_type_node, countm1, tmp);
+ neg = build2 (COMPOUND_EXPR, void_type_node, neg, tmp);
- /* tmp = abs(to - from) / abs(step) */
- ustep = fold_convert (utype, fold_build1 (ABS_EXPR, type, step));
- tmp = fold_build3 (COND_EXPR, type, pos_step,
- fold_build2 (MINUS_EXPR, type, to, from),
- fold_build2 (MINUS_EXPR, type, from, to));
- tmp = fold_build2 (TRUNC_DIV_EXPR, utype, fold_convert (utype, tmp),
- ustep);
+ tmp = fold_build3 (COND_EXPR, void_type_node, pos_step, pos, neg);
+ gfc_add_expr_to_block (&block, tmp);
}
else
{
/* TODO: We could use the same width as the real type.
This would probably cause more problems that it solves
when we implement "long double" types. */
- utype = unsigned_type_for (gfc_array_index_type);
+
tmp = fold_build2 (MINUS_EXPR, type, to, from);
tmp = fold_build2 (RDIV_EXPR, type, tmp, step);
tmp = fold_build1 (FIX_TRUNC_EXPR, utype, tmp);
- }
- countm1 = gfc_create_var (utype, "countm1");
- gfc_add_modify (&block, countm1, tmp);
-
- /* Cycle and exit statements are implemented with gotos. */
- cycle_label = gfc_build_label_decl (NULL_TREE);
- exit_label = gfc_build_label_decl (NULL_TREE);
- TREE_USED (exit_label) = 1;
-
- /* Initialize the DO variable: dovar = from. */
- gfc_add_modify (&block, dovar, from);
+ gfc_add_modify (&block, countm1, tmp);
- /* If the loop is empty, go directly to the exit label. */
- tmp = fold_build3 (COND_EXPR, void_type_node, empty,
- build1_v (GOTO_EXPR, exit_label), build_empty_stmt ());
- gfc_add_expr_to_block (&block, tmp);
+ /* We need a special check for empty loops:
+ empty = (step > 0 ? to < from : to > from); */
+ tmp = fold_build3 (COND_EXPR, boolean_type_node, pos_step,
+ fold_build2 (LT_EXPR, boolean_type_node, to, from),
+ fold_build2 (GT_EXPR, boolean_type_node, to, from));
+ /* If the loop is empty, go directly to the exit label. */
+ tmp = fold_build3 (COND_EXPR, void_type_node, tmp,
+ build1_v (GOTO_EXPR, exit_label),
+ build_empty_stmt ());
+ gfc_add_expr_to_block (&block, tmp);
+ }
/* Loop body. */
gfc_start_block (&body);
Jakub