This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Improve Fortran non-constant step DO translation (PR fortran/37536)


Hi!

When it is unknown whether DO step is positive or negative and DOVAR
has INTEGER_TYPE, Fortran generates unnecessarily complicated code.
    if (D.1533 > 0) goto <D.1542>; else goto <D.1543>;
    <D.1542>:
    D.1544 = D.1532 - e.2;
    iftmp.1 = (character(kind=4)) D.1544;
    goto <D.1545>;
    <D.1543>:
    D.1546 = e.2 - D.1532;
    iftmp.1 = (character(kind=4)) D.1546;
    <D.1545>:
    D.1547 = ABS_EXPR <D.1533>;
    D.1548 = (character(kind=4)) D.1547;
    countm1.0 = iftmp.1 / D.1548;
    e = e.2;
    if (D.1533 > 0) goto <D.1550>; else goto <D.1551>;
    <D.1550>:
    iftmp.3 = D.1532 < e.2;
    goto <D.1552>;
    <D.1551>:
    iftmp.3 = D.1532 > e.2;
    <D.1552>:
    if (iftmp.3 != 0) goto L.2; else goto <D.1553>;
    <D.1553>:
    <D.1554>:
    loop... branch to L.2 when done
    <goto D.1554>;
    <L.2>:
which e.g. leads on most arches into storing a comparison flag
into a variable and then comparing it, which is on many arches costly.
With the patch below, we generate instead:
    e = e.1;
    if (D.1533 > 0) goto <D.1541>; else goto <D.1542>;
    <D.1541>:
    if (D.1532 < e.1) goto L.2; else goto <D.1543>;
    <D.1543>:
    D.1544 = D.1532 - e.1;
    D.1545 = (character(kind=4)) D.1544;
    D.1546 = (character(kind=4)) D.1533;
    countm1.0 = D.1545 / D.1546;
    goto <D.1547>;
    <D.1542>:
    if (D.1532 > e.1) goto L.2; else goto <D.1548>;
    <D.1548>:
    D.1549 = e.1 - D.1532;
    D.1550 = (character(kind=4)) D.1549;
    D.1551 = -D.1533;
    D.1552 = (character(kind=4)) D.1551;
    countm1.0 = D.1550 / D.1552;
    <D.1547>:
    <D.1553>:
    loop... branch to L.2 when done
    <goto <D.1553>;
    <L.2>:
i.e. we merge the 3 conditionals with step > 0 condition (to - from vs.
from - to, ABS_EXPR (step) and the empty loop condition)
into one bigger one.

Bootstrapped/regtested on x86_64-linux, on the given testcase smaller code
is generated on x86_64-linux at least.
In the PR Luis said the patch improved slightly swim on ppc32 and art on
ppc64.

Ok for trunk?

2008-09-16  Jakub Jelinek  <jakub@redhat.com>

	PR fortran/37536
	* trans-stmt.c (gfc_trans_do): Optimize integer type non-simple
	do loop initialization.

--- gcc/fortran/trans-stmt.c.jj	2008-09-05 12:55:53.000000000 +0200
+++ gcc/fortran/trans-stmt.c	2008-09-16 14:40:00.000000000 +0200
@@ -834,7 +834,6 @@ gfc_trans_do (gfc_code * code)
   tree from;
   tree to;
   tree step;
-  tree empty;
   tree countm1;
   tree type;
   tree utype;
@@ -875,56 +874,88 @@ gfc_trans_do (gfc_code * code)
       && (integer_onep (step)
 	|| tree_int_cst_equal (step, integer_minus_one_node)))
     return gfc_trans_simple_do (code, &block, dovar, from, to, step);
-      
-  /* We need a special check for empty loops:
-     empty = (step > 0 ? to < from : to > from);  */
+
   pos_step = fold_build2 (GT_EXPR, boolean_type_node, step,
 			  fold_convert (type, integer_zero_node));
-  empty = fold_build3 (COND_EXPR, boolean_type_node, pos_step,
-		       fold_build2 (LT_EXPR, boolean_type_node, to, from),
-		       fold_build2 (GT_EXPR, boolean_type_node, to, from));
 
-  /* Initialize loop count. This code is executed before we enter the
-     loop body. We generate: countm1 = abs(to - from) / abs(step).  */
+  if (TREE_CODE (type) == INTEGER_TYPE)
+    utype = unsigned_type_for (type);
+  else
+    utype = unsigned_type_for (gfc_array_index_type);
+  countm1 = gfc_create_var (utype, "countm1");
+
+  /* Cycle and exit statements are implemented with gotos.  */
+  cycle_label = gfc_build_label_decl (NULL_TREE);
+  exit_label = gfc_build_label_decl (NULL_TREE);
+  TREE_USED (exit_label) = 1;
+
+  /* Initialize the DO variable: dovar = from.  */
+  gfc_add_modify (&block, dovar, from);
+
+  /* Initialize loop count and jump to exit label if the loop is empty.
+     This code is executed before we enter the loop body. We generate:
+     if (step > 0)
+       {
+	 if (to < from) goto exit_label;
+	 countm1 = to - from / step;
+       }
+     else
+       {
+	 if (to > from) goto exit_label;
+	 countm1 = from - to / -step;
+       }  */
   if (TREE_CODE (type) == INTEGER_TYPE)
     {
-      tree ustep;
+      tree pos, neg;
 
-      utype = unsigned_type_for (type);
+      tmp = fold_build2 (LT_EXPR, boolean_type_node, to, from);
+      pos = fold_build3 (COND_EXPR, void_type_node, tmp,
+			 build1_v (GOTO_EXPR, exit_label),
+			 build_empty_stmt ());
+      tmp = fold_build2 (MINUS_EXPR, type, to, from);
+      tmp = fold_convert (utype, tmp);
+      tmp = fold_build2 (TRUNC_DIV_EXPR, utype, tmp,
+			 fold_convert (utype, step));
+      tmp = build2 (MODIFY_EXPR, void_type_node, countm1, tmp);
+      pos = build2 (COMPOUND_EXPR, void_type_node, pos, tmp);
+
+      tmp = fold_build2 (GT_EXPR, boolean_type_node, to, from);
+      neg = fold_build3 (COND_EXPR, void_type_node, tmp,
+			 build1_v (GOTO_EXPR, exit_label),
+			 build_empty_stmt ());
+      tmp = fold_build2 (MINUS_EXPR, type, from, to);
+      tmp = fold_convert (utype, tmp);
+      tmp = fold_build2 (TRUNC_DIV_EXPR, utype, tmp,
+			 fold_convert (utype, fold_build1 (NEGATE_EXPR,
+							   type, step)));
+      tmp = build2 (MODIFY_EXPR, void_type_node, countm1, tmp);
+      neg = build2 (COMPOUND_EXPR, void_type_node, neg, tmp);
 
-      /* tmp = abs(to - from) / abs(step) */
-      ustep = fold_convert (utype, fold_build1 (ABS_EXPR, type, step));
-      tmp = fold_build3 (COND_EXPR, type, pos_step,
-			 fold_build2 (MINUS_EXPR, type, to, from),
-			 fold_build2 (MINUS_EXPR, type, from, to));
-      tmp = fold_build2 (TRUNC_DIV_EXPR, utype, fold_convert (utype, tmp),
-			 ustep);
+      tmp = fold_build3 (COND_EXPR, void_type_node, pos_step, pos, neg);
+      gfc_add_expr_to_block (&block, tmp);
     }
   else
     {
       /* TODO: We could use the same width as the real type.
 	 This would probably cause more problems that it solves
 	 when we implement "long double" types.  */
-      utype = unsigned_type_for (gfc_array_index_type);
+
       tmp = fold_build2 (MINUS_EXPR, type, to, from);
       tmp = fold_build2 (RDIV_EXPR, type, tmp, step);
       tmp = fold_build1 (FIX_TRUNC_EXPR, utype, tmp);
-    }
-  countm1 = gfc_create_var (utype, "countm1");
-  gfc_add_modify (&block, countm1, tmp);
-
-  /* Cycle and exit statements are implemented with gotos.  */
-  cycle_label = gfc_build_label_decl (NULL_TREE);
-  exit_label = gfc_build_label_decl (NULL_TREE);
-  TREE_USED (exit_label) = 1;
-
-  /* Initialize the DO variable: dovar = from.  */
-  gfc_add_modify (&block, dovar, from);
+      gfc_add_modify (&block, countm1, tmp);
 
-  /* If the loop is empty, go directly to the exit label.  */
-  tmp = fold_build3 (COND_EXPR, void_type_node, empty,
-		     build1_v (GOTO_EXPR, exit_label), build_empty_stmt ());
-  gfc_add_expr_to_block (&block, tmp);
+      /* We need a special check for empty loops:
+	 empty = (step > 0 ? to < from : to > from);  */
+      tmp = fold_build3 (COND_EXPR, boolean_type_node, pos_step,
+			 fold_build2 (LT_EXPR, boolean_type_node, to, from),
+			 fold_build2 (GT_EXPR, boolean_type_node, to, from));
+      /* If the loop is empty, go directly to the exit label.  */
+      tmp = fold_build3 (COND_EXPR, void_type_node, tmp,
+			 build1_v (GOTO_EXPR, exit_label),
+			 build_empty_stmt ());
+      gfc_add_expr_to_block (&block, tmp);
+    }
 
   /* Loop body.  */
   gfc_start_block (&body);

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]