This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[patch] [4.1] Fix PR20256, PR25211 and PR26435 linear transform

From: "Sebastian Pop" <sebpop at gmail dot com>
To: gcc-patches at gcc dot gnu dot org, "Daniel Berlin" <dberlin at dberlin dot org>, "Sebastian Pop" <sebastian dot pop at cri dot ensmp dot fr>
Date: Tue, 8 Aug 2006 15:51:47 +0200
Subject: [patch] [4.1] Fix PR20256, PR25211 and PR26435 linear transform

Hi,

Here is a backport of the patches that we have in mainline for correcting
all these bugs.  The following patch was bootstrapped and tested on amd64
on the 4.1 branch.  Okay for 4.1 branch?

Sebastian

2006-08-08  Sebastian Pop  <pop@cri.ensmp.fr>

	PR middle-end/25211
	PR middle-end/20256
	PR middle-end/26435
	* tree-loop-linear.c (linear_transform_loops): Don't test perfect_nest_p.
	Call rewrite_into_loop_closed_ssa only when something changed.
	* lambda.h (gcc_loopnest_to_lambda_loopnest): Update declaration.
	* lambda-code.c (can_convert_to_perfect_nest): Declared.
	(gcc_loopnest_to_lambda_loopnest): Removed need_perfect_nest parameter.
	Test for perfect_nest_p here.  Fix formating.
	(replace_uses_equiv_to_x_with_y): Fix formating.
	(stmt_uses_op): Removed.
	(can_convert_to_perfect_nest): Removed loopivs parameter.
	Complete the test by checking the scalar dependences.
	(perfect_nestify): Remove the test for can_convert_to_perfect_nest.
	Fix formating.  Don't copy statements in the inner loop: move them to
	the inner loop header.


Index: tree-loop-linear.c
===================================================================
--- tree-loop-linear.c	(revision 116012)
+++ tree-loop-linear.c	(working copy)
@@ -1,5 +1,5 @@
 /* Linear Loop transforms
-   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
    Contributed by Daniel Berlin <dberlin@dberlin.org>.
 
 This file is part of GCC.
@@ -243,6 +243,7 @@ try_interchange_loops (lambda_trans_matr
 void
 linear_transform_loops (struct loops *loops)
 {
+  bool modified = false;
   unsigned int i;
   VEC(tree,heap) *oldivs = NULL;
   VEC(tree,heap) *invariants = NULL;
@@ -257,7 +258,6 @@ linear_transform_loops (struct loops *lo
       lambda_loopnest before, after;
       lambda_trans_matrix trans;
       bool problem = false;
-      bool need_perfect_nest = false;
       /* If it's not a loop nest, we don't want it.
          We also don't handle sibling loops properly, 
          which are loops of the following form:
@@ -334,12 +334,9 @@ linear_transform_loops (struct loops *lo
 	    fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
 	  continue;
 	}
-      if (!perfect_nest_p (loop_nest))
-	need_perfect_nest = true;
-      before = gcc_loopnest_to_lambda_loopnest (loops,
-						loop_nest, &oldivs, 
-						&invariants,
-						need_perfect_nest);
+
+      before = gcc_loopnest_to_lambda_loopnest (loops, loop_nest, &oldivs, 
+						&invariants);
       if (!before)
 	continue;
             
@@ -357,6 +354,8 @@ linear_transform_loops (struct loops *lo
 	}
       lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
 				       after, trans);
+      modified = true;
+
       if (dump_file)
 	fprintf (dump_file, "Successfully transformed loop.\n");
       free_dependence_relations (dependence_relations);
@@ -365,5 +364,7 @@ linear_transform_loops (struct loops *lo
   VEC_free (tree, heap, oldivs);
   VEC_free (tree, heap, invariants);
   scev_reset ();
-  rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);
+
+  if (modified)
+    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);
 }
Index: testsuite/gcc.dg/tree-ssa/ltrans-1.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/ltrans-1.c	(revision 116012)
+++ testsuite/gcc.dg/tree-ssa/ltrans-1.c	(working copy)
@@ -17,7 +17,6 @@ int foo(int N, int *res)
     }
   *res = sum + N;
 }
-/* { dg-final { scan-tree-dump-times "converted loop nest to perfect
-   loop nest" 1 "ltrans"} } */ 
+/* { dg-final { scan-tree-dump-times "converted loop nest to perfect loop nest" 1 "ltrans"} } */ 
 /* { dg-final { scan-tree-dump-times "transformed loop" 1 "ltrans"} } */ 
 /* { dg-final { cleanup-tree-dump "ltrans" } } */
Index: testsuite/gcc.dg/tree-ssa/pr26435.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/pr26435.c	(revision 0)
+++ testsuite/gcc.dg/tree-ssa/pr26435.c	(revision 0)
@@ -0,0 +1,19 @@
+/* { dg-do compile } */ 
+/* { dg-options "-O2 -ftree-loop-linear -fdump-tree-ltrans-all" } */
+
+int foo(int *p, int n)
+{
+  int i, j, k = 0;
+
+  /* This is a reduction: there is a scalar dependence that cannot be
+     removed by rewriting IVs.  This code cannot and should not be
+     transformed into a perfect loop.  */
+  for (i = 0; i < 2; ++i, p += n)
+    for (j = 0; j < 2; ++j)
+      k += p[j];
+
+  return k;
+}
+
+/* { dg-final { scan-tree-dump-times "converted loop nest to perfect loop nest" 0 "ltrans"} } */ 
+/* { dg-final { cleanup-tree-dump "ltrans" } } */
Index: testsuite/gcc.dg/tree-ssa/pr20256.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/pr20256.c	(revision 0)
+++ testsuite/gcc.dg/tree-ssa/pr20256.c	(revision 0)
@@ -0,0 +1,24 @@
+/* { dg-do compile } */ 
+/* { dg-options "-O2 -ftree-loop-linear -fdump-tree-ltrans-all" } */
+
+int foo()
+{
+  int x[2][2], y[2];
+  int i, n, s;
+
+  /* This is a reduction: there is a scalar dependence that cannot be
+     removed by rewriting IVs.  This code cannot and should not be
+     transformed into a perfect loop.  */
+  for (n = 0; n < 2; n++)
+    {
+      s = 0;
+      for (i = 0; i < 2; i++)
+        s += x[n][i]*y[i];
+      s += 1;
+    }
+
+  return s;
+}
+
+/* { dg-final { scan-tree-dump-times "converted loop nest to perfect loop nest" 0 "ltrans"} } */ 
+/* { dg-final { cleanup-tree-dump "ltrans" } } */
Index: testsuite/gfortran.dg/pr27745.f90
===================================================================
--- testsuite/gfortran.dg/pr27745.f90	(revision 0)
+++ testsuite/gfortran.dg/pr27745.f90	(revision 0)
@@ -0,0 +1,15 @@
+! { dg-do compile }
+! { dg-options "-O2 -ftree-loop-linear" }
+
+SUBROUTINE BUG
+  INTEGER I, J, M
+  REAL V
+  COMMON  A(100,100), B(100,100), M, V
+  DO 200 I = 1, M
+     DO 100 J = 1, M
+        V = V + A(I,J)
+100     CONTINUE
+        B(I,I) = B(I,I) * I
+200     CONTINUE
+        STOP
+     END
Index: lambda.h
===================================================================
--- lambda.h	(revision 116012)
+++ lambda.h	(working copy)
@@ -1,5 +1,5 @@
 /* Lambda matrix and vector interface.
-   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
    Contributed by Daniel Berlin <dberlin@dberlin.org>
 
 This file is part of GCC.
@@ -200,8 +200,7 @@ void print_lambda_body_vector (FILE *, l
 lambda_loopnest gcc_loopnest_to_lambda_loopnest (struct loops *,
 						 struct loop *,
 						 VEC(tree,heap) **,
-						 VEC(tree,heap) **,
-						 bool);
+						 VEC(tree,heap) **);
 void lambda_loopnest_to_gcc_loopnest (struct loop *,
 				      VEC(tree,heap) *, VEC(tree,heap) *,
 				      lambda_loopnest, lambda_trans_matrix);
Index: lambda-code.c
===================================================================
--- lambda-code.c	(revision 116012)
+++ lambda-code.c	(working copy)
@@ -1,5 +1,5 @@
 /*  Loop transformation code generation
-    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+    Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
     Contributed by Daniel Berlin <dberlin@dberlin.org>
 
     This file is part of GCC.
@@ -149,6 +149,7 @@ static lambda_lattice lambda_lattice_new
 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
 
 static tree find_induction_var_from_exit_cond (struct loop *);
+static bool can_convert_to_perfect_nest (struct loop *);
 
 /* Create a new lambda body vector.  */
 
@@ -1498,14 +1499,13 @@ DEF_VEC_ALLOC_P(lambda_loop,heap);
 
 lambda_loopnest
 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
-				 struct loop * loop_nest,
+				 struct loop *loop_nest,
 				 VEC(tree,heap) **inductionvars,
-				 VEC(tree,heap) **invariants,
-				 bool need_perfect_nest)
+				 VEC(tree,heap) **invariants)
 {
   lambda_loopnest ret = NULL;
-  struct loop *temp;
-  int depth = 0;
+  struct loop *temp = loop_nest;
+  int depth = depth_of_nest (loop_nest);
   size_t i;
   VEC(lambda_loop,heap) *loops = NULL;
   VEC(tree,heap) *uboundvars = NULL;
@@ -1513,9 +1513,11 @@ gcc_loopnest_to_lambda_loopnest (struct 
   VEC(int,heap) *steps = NULL;
   lambda_loop newloop;
   tree inductionvar = NULL;
-  
-  depth = depth_of_nest (loop_nest);
-  temp = loop_nest;
+  bool perfect_nest = perfect_nest_p (loop_nest);
+
+  if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
+    goto fail;
+
   while (temp)
     {
       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
@@ -1523,12 +1525,14 @@ gcc_loopnest_to_lambda_loopnest (struct 
 					 &lboundvars, &uboundvars,
 					 &steps);
       if (!newloop)
-	return NULL;
+	goto fail;
+
       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
       VEC_safe_push (lambda_loop, heap, loops, newloop);
       temp = temp->inner;
     }
-  if (need_perfect_nest)
+
+  if (!perfect_nest)
     {
       if (!perfect_nestify (currloops, loop_nest, 
 			    lboundvars, uboundvars, steps, *inductionvars))
@@ -1542,9 +1546,12 @@ gcc_loopnest_to_lambda_loopnest (struct 
 	fprintf (dump_file,
 		 "Successfully converted loop nest to perfect loop nest.\n");
     }
+
   ret = lambda_loopnest_new (depth, 2 * depth);
+
   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
     LN_LOOPS (ret)[i] = newloop;
+
  fail:
   VEC_free (lambda_loop, heap, loops);
   VEC_free (tree, heap, uboundvars);
@@ -2156,13 +2163,12 @@ replace_uses_equiv_to_x_with_y (struct l
     {
       tree use = USE_FROM_PTR (use_p);
       tree step = NULL_TREE;
-      tree access_fn = NULL_TREE;
-      
-      
-      access_fn = instantiate_parameters
-	(loop, analyze_scalar_evolution (loop, use));
-      if (access_fn != NULL_TREE && access_fn != chrec_dont_know)
-	step = evolution_part_in_loop_num (access_fn, loop->num);
+      tree scev = instantiate_parameters (loop,
+					  analyze_scalar_evolution (loop, use));
+
+      if (scev != NULL_TREE && scev != chrec_dont_know)
+	step = evolution_part_in_loop_num (scev, loop->num);
+
       if ((step && step != chrec_dont_know 
 	   && TREE_CODE (step) == INTEGER_CST
 	   && int_cst_value (step) == xstep)
@@ -2171,22 +2177,6 @@ replace_uses_equiv_to_x_with_y (struct l
     }
 }
 
-/* Return TRUE if STMT uses tree OP in it's uses.  */
-
-static bool
-stmt_uses_op (tree stmt, tree op)
-{
-  ssa_op_iter iter;
-  tree use;
-
-  FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
-    {
-      if (use == op)
-	return true;
-    }
-  return false;
-}
-
 /* Return true if STMT is an exit PHI for LOOP */
 
 static bool
@@ -2236,15 +2226,39 @@ can_put_in_inner_loop (struct loop *inne
   
 }
 
+/* Return true if STMT can be put *after* the inner loop of LOOP.  */
 
-/* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
-   one.  LOOPIVS is a vector of induction variables, one per loop.  
-   ATM, we only handle imperfect nests of depth 2, where all of the statements
-   occur after the inner loop.  */
+static bool
+can_put_after_inner_loop (struct loop *loop, tree stmt)
+{
+  imm_use_iterator imm_iter;
+  use_operand_p use_p;
+
+  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
+    return false;
+  
+  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
+    {
+      if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
+	{
+	  basic_block immbb = bb_for_stmt (USE_STMT (use_p));
+	  
+	  if (!dominated_by_p (CDI_DOMINATORS,
+			       immbb,
+			       loop->inner->header)
+	      && !can_put_in_inner_loop (loop->inner, stmt))
+	    return false;
+	}
+    }
+  return true;
+}
+
+/* Return TRUE if LOOP is an imperfect nest that we can convert to a
+   perfect one.  At the moment, we only handle imperfect nests of
+   depth 2, where all of the statements occur after the inner loop.  */
 
 static bool
-can_convert_to_perfect_nest (struct loop *loop,
-			     VEC(tree,heap) *loopivs)
+can_convert_to_perfect_nest (struct loop *loop)
 {
   basic_block *bbs;
   tree exit_condition, phi;
@@ -2264,19 +2278,13 @@ can_convert_to_perfect_nest (struct loop
 	{
 	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
 	    { 
-	      size_t j;
 	      tree stmt = bsi_stmt (bsi);
-	      tree iv;
-	      
+
 	      if (stmt == exit_condition
 		  || not_interesting_stmt (stmt)
 		  || stmt_is_bumper_for_loop (loop, stmt))
 		continue;
-	      /* If the statement uses inner loop ivs, we == screwed.  */
-	      for (j = 1; VEC_iterate (tree, loopivs, j, iv); j++)
-		if (stmt_uses_op (stmt, iv))
-		  goto fail;
-	      
+
 	      /* If this is a simple operation like a cast that is invariant
 		 in the inner loop, only used there, and we can place it
 		 there, then it's not going to hurt us.
@@ -2286,10 +2294,65 @@ can_convert_to_perfect_nest (struct loop
 		 theory that we are going to gain a lot more by interchanging
 		 the loop than we are by leaving some invariant code there for
 		 some other pass to clean up.  */
-	      if (TREE_CODE (stmt) == MODIFY_EXPR
-		  && is_gimple_cast (TREE_OPERAND (stmt, 1))
-		  && can_put_in_inner_loop (loop->inner, stmt))
-		continue;
+	      if (TREE_CODE (stmt) == MODIFY_EXPR)
+		{
+		  use_operand_p use_a, use_b;
+		  imm_use_iterator imm_iter;
+		  ssa_op_iter op_iter, op_iter1;
+		  tree op0 = TREE_OPERAND (stmt, 0);
+		  tree scev = instantiate_parameters
+		    (loop, analyze_scalar_evolution (loop, op0));
+
+		  /* If the IV is simple, it can be duplicated.  */
+		  if (!automatically_generated_chrec_p (scev))
+		    {
+		      tree step = evolution_part_in_loop_num (scev, loop->num);
+		      if (step && step != chrec_dont_know 
+			  && TREE_CODE (step) == INTEGER_CST)
+			continue;
+		    }
+
+		  /* The statement should not define a variable used
+		     in the inner loop.  */
+		  if (TREE_CODE (op0) == SSA_NAME)
+		    FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
+		      if (bb_for_stmt (USE_STMT (use_a))->loop_father
+			  == loop->inner)
+			goto fail;
+
+		  FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
+		    {
+		      tree node, op = USE_FROM_PTR (use_a);
+
+		      /* The variables should not be used in both loops.  */
+		      FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
+		      if (bb_for_stmt (USE_STMT (use_b))->loop_father
+			  == loop->inner)
+			goto fail;
+
+		      /* The statement should not use the value of a
+			 scalar that was modified in the loop.  */
+		      node = SSA_NAME_DEF_STMT (op);
+		      if (TREE_CODE (node) == PHI_NODE)
+			FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
+			  {
+			    tree arg = USE_FROM_PTR (use_b);
+
+			    if (TREE_CODE (arg) == SSA_NAME)
+			      {
+				tree arg_stmt = SSA_NAME_DEF_STMT (arg);
+
+				if (bb_for_stmt (arg_stmt)->loop_father
+				    == loop->inner)
+				  goto fail;
+			      }
+			  }
+		    }
+
+		  if (can_put_in_inner_loop (loop->inner, stmt)
+		      || can_put_after_inner_loop (loop, stmt))
+		    continue;
+		}
 
 	      /* Otherwise, if the bb of a statement we care about isn't
 		 dominated by the header of the inner loop, then we can't
@@ -2379,14 +2442,10 @@ perfect_nestify (struct loops *loops,
   tree stmt;
   tree oldivvar, ivvar, ivvarinced;
   VEC(tree,heap) *phis = NULL;
-
-  if (!can_convert_to_perfect_nest (loop, loopivs))
-    return false;
-
-  /* Create the new loop */
-
+  
+  /* Create the new loop.  */
   olddest = loop->single_exit->dest;
-  preheaderbb =  loop_split_edge_with (loop->single_exit, NULL);
+  preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
   
   /* Push the exit phi nodes that we are moving.  */
@@ -2501,37 +2560,22 @@ perfect_nestify (struct loops *loops,
 
 	  if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
 	    {
-	      for (bsi = bsi_last (bbs[i]); !bsi_end_p (bsi);)
+	      block_stmt_iterator header_bsi 
+		= bsi_after_labels (loop->inner->header);
+
+	      for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
 		{ 
-		  use_operand_p use_p;
-		  imm_use_iterator imm_iter;
 		  tree stmt = bsi_stmt (bsi);
 
 		  if (stmt == exit_condition
 		      || not_interesting_stmt (stmt)
 		      || stmt_is_bumper_for_loop (loop, stmt))
 		    {
-		      if (!bsi_end_p (bsi))
-			bsi_prev (&bsi);
+		      bsi_next (&bsi);
 		      continue;
 		    }
-		  /* Move this statement back into the inner loop.
-		     This looks a bit confusing, but we are really just
-		     finding the first non-exit phi use and moving the
-		     statement to the beginning of that use's basic
-		     block.  */
-		  FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, 
-					 TREE_OPERAND (stmt, 0))
-		    {
-		      tree imm_stmt = USE_STMT (use_p);
-		      if (!exit_phi_for_loop_p (loop->inner, imm_stmt))
-			{
-			  block_stmt_iterator tobsi = bsi_after_labels (bb_for_stmt (imm_stmt));
-			  bsi_move_before (&bsi, &tobsi);
-			  update_stmt (stmt);
-			  BREAK_FROM_SAFE_IMM_USE (imm_iter);
-			} 
-		    }
+
+		  bsi_move_before (&bsi, &header_bsi);
 		}
 	    }
 	  else
@@ -2552,10 +2596,9 @@ perfect_nestify (struct loops *loops,
 		      continue;
 		    }
 		  
-		  replace_uses_equiv_to_x_with_y (loop, stmt, 
-						  oldivvar,  
-						  VEC_index (int, steps, 0),
-						  ivvar);
+		  replace_uses_equiv_to_x_with_y 
+		    (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar);
+
 		  bsi_move_before (&bsi, &tobsi);
 		  
 		  /* If the statement has any virtual operands, they may

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]