This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[patch] Fix PR26435 loop distribution in lambda-code

From: Sebastian Pop <sebastian dot pop at cri dot ensmp dot fr>
To: GCC Patches <gcc-patches at gcc dot gnu dot org>
Date: Mon, 15 May 2006 17:55:24 +0200
Subject: [patch] Fix PR26435 loop distribution in lambda-code
Hi,

Here is a patch that makes the loop distribution code more careful
about the scalar dependences: for a loop nest with two loops, it now
avoids to move scalars that are updated in both loops.  This is the
easy solution.  A more complete solution is to include a real pass for
loop distribution: Georges-Andre Silber has worked on that pass and he
will propose a patch to be included in 4.3.  Until then, here is a
patch that can also be ported to the 4.1 and 4.0 branches.

Bootstrapped and tested on amd64-linux.  Okay for trunk?

Sebastian

2006-05-15  Sebastian Pop  <pop@cri.ensmp.fr>

	* tree-loop-linear.c (linear_transform_loops): Don't test perfect_nest_p.
	Call rewrite_into_loop_closed_ssa only when something changed.
	* lambda.h (gcc_loopnest_to_lambda_loopnest): Update declaration.
	* lambda-code.c (can_convert_to_perfect_nest): Declared.
	(gcc_loopnest_to_lambda_loopnest): Removed need_perfect_nest parameter.
	Test for perfect_nest_p here.  Fix formating.
	(replace_uses_equiv_to_x_with_y): Fix formating.
	(stmt_uses_op): Removed.
	(can_convert_to_perfect_nest): Removed loopivs parameter.
	Complete the test by checking the scalar dependences.
	(perfect_nestify): Remove the test for can_convert_to_perfect_nest.
	Fix formating.  

Index: tree-loop-linear.c
===================================================================
*** tree-loop-linear.c	(revision 113724)
--- tree-loop-linear.c	(working copy)
*************** try_interchange_loops (lambda_trans_matr
*** 238,243 ****
--- 238,244 ----
  void
  linear_transform_loops (struct loops *loops)
  {
+   bool modified = false;
    unsigned int i;
    VEC(tree,heap) *oldivs = NULL;
    VEC(tree,heap) *invariants = NULL;
*************** linear_transform_loops (struct loops *lo
*** 252,258 ****
        lambda_loopnest before, after;
        lambda_trans_matrix trans;
        bool problem = false;
-       bool need_perfect_nest = false;
        /* If it's not a loop nest, we don't want it.
           We also don't handle sibling loops properly, 
           which are loops of the following form:
--- 253,258 ----
*************** linear_transform_loops (struct loops *lo
*** 316,328 ****
  	  continue;
  	}
  
!       if (!perfect_nest_p (loop_nest))
! 	need_perfect_nest = true;
  
-       before = gcc_loopnest_to_lambda_loopnest (loops,
- 						loop_nest, &oldivs, 
- 						&invariants,
- 						need_perfect_nest);
        if (!before)
  	continue;
              
--- 316,324 ----
  	  continue;
  	}
  
!       before = gcc_loopnest_to_lambda_loopnest (loops, loop_nest, &oldivs,
! 						&invariants);
  
        if (!before)
  	continue;
              
*************** linear_transform_loops (struct loops *lo
*** 342,347 ****
--- 338,344 ----
  
        lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
  				       after, trans);
+       modified = true;
  
        if (dump_file)
  	fprintf (dump_file, "Successfully transformed loop.\n");
*************** linear_transform_loops (struct loops *lo
*** 353,357 ****
    VEC_free (tree, heap, oldivs);
    VEC_free (tree, heap, invariants);
    scev_reset ();
!   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);
  }
--- 350,356 ----
    VEC_free (tree, heap, oldivs);
    VEC_free (tree, heap, invariants);
    scev_reset ();
! 
!   if (modified)
!     rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);
  }
Index: testsuite/gcc.dg/tree-ssa/ltrans-1.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/ltrans-1.c	(revision 113724)
--- testsuite/gcc.dg/tree-ssa/ltrans-1.c	(working copy)
*************** int foo(int N, int *res)
*** 18,24 ****
      }
    *res = sum + N;
  }
! /* { dg-final { scan-tree-dump-times "converted loop nest to perfect
!    loop nest" 1 "ltrans"} } */ 
  /* { dg-final { scan-tree-dump-times "transformed loop" 1 "ltrans"} } */ 
  /* { dg-final { cleanup-tree-dump "ltrans" } } */
--- 18,23 ----
      }
    *res = sum + N;
  }
! /* { dg-final { scan-tree-dump-times "converted loop nest to perfect loop nest" 1 "ltrans"} } */ 
  /* { dg-final { scan-tree-dump-times "transformed loop" 1 "ltrans"} } */ 
  /* { dg-final { cleanup-tree-dump "ltrans" } } */
Index: testsuite/gcc.dg/tree-ssa/pr26435.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/pr26435.c	(revision 0)
--- testsuite/gcc.dg/tree-ssa/pr26435.c	(revision 0)
***************
*** 0 ****
--- 1,20 ----
+ /* { dg-do compile } */ 
+ /* { dg-options "-O2 -ftree-loop-linear -fdump-tree-ltrans-all" } */
+ /* { dg-require-effective-target size32plus } */
+ 
+ int foo(int *p, int n)
+ {
+   int i, j, k = 0;
+ 
+   /* This is a reduction: there is a scalar dependence that cannot be
+      removed by rewriting IVs.  This code cannot and should not be
+      transformed into a perfect loop.  */
+   for (i = 0; i < 2; ++i, p += n)
+     for (j = 0; j < 2; ++j)
+       k += p[j];
+ 
+   return k;
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "converted loop nest to perfect loop nest" 0 "ltrans"} } */ 
+ /* { dg-final { cleanup-tree-dump "ltrans" } } */
Index: lambda.h
===================================================================
*** lambda.h	(revision 113724)
--- lambda.h	(working copy)
*************** void print_lambda_body_vector (FILE *, l
*** 199,206 ****
  lambda_loopnest gcc_loopnest_to_lambda_loopnest (struct loops *,
  						 struct loop *,
  						 VEC(tree,heap) **,
! 						 VEC(tree,heap) **,
! 						 bool);
  void lambda_loopnest_to_gcc_loopnest (struct loop *,
  				      VEC(tree,heap) *, VEC(tree,heap) *,
  				      lambda_loopnest, lambda_trans_matrix);
--- 199,205 ----
  lambda_loopnest gcc_loopnest_to_lambda_loopnest (struct loops *,
  						 struct loop *,
  						 VEC(tree,heap) **,
! 						 VEC(tree,heap) **);
  void lambda_loopnest_to_gcc_loopnest (struct loop *,
  				      VEC(tree,heap) *, VEC(tree,heap) *,
  				      lambda_loopnest, lambda_trans_matrix);
Index: lambda-code.c
===================================================================
*** lambda-code.c	(revision 113724)
--- lambda-code.c	(working copy)
*************** static lambda_lattice lambda_lattice_new
*** 147,152 ****
--- 147,153 ----
  static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
  
  static tree find_induction_var_from_exit_cond (struct loop *);
+ static bool can_convert_to_perfect_nest (struct loop *);
  
  /* Create a new lambda body vector.  */
  
*************** DEF_VEC_ALLOC_P(lambda_loop,heap);
*** 1457,1470 ****
  
  lambda_loopnest
  gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
! 				 struct loop * loop_nest,
  				 VEC(tree,heap) **inductionvars,
! 				 VEC(tree,heap) **invariants,
! 				 bool need_perfect_nest)
  {
    lambda_loopnest ret = NULL;
!   struct loop *temp;
!   int depth = 0;
    size_t i;
    VEC(lambda_loop,heap) *loops = NULL;
    VEC(tree,heap) *uboundvars = NULL;
--- 1458,1470 ----
  
  lambda_loopnest
  gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
! 				 struct loop *loop_nest,
  				 VEC(tree,heap) **inductionvars,
! 				 VEC(tree,heap) **invariants)
  {
    lambda_loopnest ret = NULL;
!   struct loop *temp = loop_nest;
!   int depth = depth_of_nest (loop_nest);
    size_t i;
    VEC(lambda_loop,heap) *loops = NULL;
    VEC(tree,heap) *uboundvars = NULL;
*************** gcc_loopnest_to_lambda_loopnest (struct 
*** 1472,1480 ****
    VEC(int,heap) *steps = NULL;
    lambda_loop newloop;
    tree inductionvar = NULL;
!   
!   depth = depth_of_nest (loop_nest);
!   temp = loop_nest;
    while (temp)
      {
        newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
--- 1472,1482 ----
    VEC(int,heap) *steps = NULL;
    lambda_loop newloop;
    tree inductionvar = NULL;
!   bool perfect_nest = perfect_nest_p (loop_nest);
! 
!   if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
!     goto fail;
! 
    while (temp)
      {
        newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
*************** gcc_loopnest_to_lambda_loopnest (struct 
*** 1482,1493 ****
  					 &lboundvars, &uboundvars,
  					 &steps);
        if (!newloop)
! 	return NULL;
        VEC_safe_push (tree, heap, *inductionvars, inductionvar);
        VEC_safe_push (lambda_loop, heap, loops, newloop);
        temp = temp->inner;
      }
!   if (need_perfect_nest)
      {
        if (!perfect_nestify (currloops, loop_nest, 
  			    lboundvars, uboundvars, steps, *inductionvars))
--- 1484,1497 ----
  					 &lboundvars, &uboundvars,
  					 &steps);
        if (!newloop)
! 	goto fail;
! 
        VEC_safe_push (tree, heap, *inductionvars, inductionvar);
        VEC_safe_push (lambda_loop, heap, loops, newloop);
        temp = temp->inner;
      }
! 
!   if (!perfect_nest)
      {
        if (!perfect_nestify (currloops, loop_nest, 
  			    lboundvars, uboundvars, steps, *inductionvars))
*************** gcc_loopnest_to_lambda_loopnest (struct 
*** 1501,1509 ****
--- 1505,1516 ----
  	fprintf (dump_file,
  		 "Successfully converted loop nest to perfect loop nest.\n");
      }
+ 
    ret = lambda_loopnest_new (depth, 2 * depth);
+ 
    for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
      LN_LOOPS (ret)[i] = newloop;
+ 
   fail:
    VEC_free (lambda_loop, heap, loops);
    VEC_free (tree, heap, uboundvars);
*************** replace_uses_equiv_to_x_with_y (struct l
*** 2110,2122 ****
      {
        tree use = USE_FROM_PTR (use_p);
        tree step = NULL_TREE;
!       tree access_fn = NULL_TREE;
!       
!       
!       access_fn = instantiate_parameters
! 	(loop, analyze_scalar_evolution (loop, use));
!       if (access_fn != NULL_TREE && access_fn != chrec_dont_know)
! 	step = evolution_part_in_loop_num (access_fn, loop->num);
        if ((step && step != chrec_dont_know 
  	   && TREE_CODE (step) == INTEGER_CST
  	   && int_cst_value (step) == xstep)
--- 2117,2128 ----
      {
        tree use = USE_FROM_PTR (use_p);
        tree step = NULL_TREE;
!       tree scev = instantiate_parameters (loop,
! 					  analyze_scalar_evolution (loop, use));
! 
!       if (scev != NULL_TREE && scev != chrec_dont_know)
! 	step = evolution_part_in_loop_num (scev, loop->num);
! 
        if ((step && step != chrec_dont_know 
  	   && TREE_CODE (step) == INTEGER_CST
  	   && int_cst_value (step) == xstep)
*************** replace_uses_equiv_to_x_with_y (struct l
*** 2125,2146 ****
      }
  }
  
- /* Return TRUE if STMT uses tree OP in it's uses.  */
- 
- static bool
- stmt_uses_op (tree stmt, tree op)
- {
-   ssa_op_iter iter;
-   tree use;
- 
-   FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
-     {
-       if (use == op)
- 	return true;
-     }
-   return false;
- }
- 
  /* Return true if STMT is an exit PHI for LOOP */
  
  static bool
--- 2131,2136 ----
*************** can_put_after_inner_loop (struct loop *l
*** 2210,2223 ****
  
  
  
! /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
!    one.  LOOPIVS is a vector of induction variables, one per loop.  
!    ATM, we only handle imperfect nests of depth 2, where all of the statements
!    occur after the inner loop.  */
  
  static bool
! can_convert_to_perfect_nest (struct loop *loop,
! 			     VEC(tree,heap) *loopivs)
  {
    basic_block *bbs;
    tree exit_condition, phi;
--- 2200,2211 ----
  
  
  
! /* Return TRUE if LOOP is an imperfect nest that we can convert to a
!    perfect one.  At the moment, we only handle imperfect nests of
!    depth 2, where all of the statements occur after the inner loop.  */
  
  static bool
! can_convert_to_perfect_nest (struct loop *loop)
  {
    basic_block *bbs;
    tree exit_condition, phi;
*************** can_convert_to_perfect_nest (struct loop
*** 2237,2255 ****
  	{
  	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
  	    { 
- 	      size_t j;
  	      tree stmt = bsi_stmt (bsi);
! 	      tree iv;
! 	      
  	      if (stmt == exit_condition
  		  || not_interesting_stmt (stmt)
  		  || stmt_is_bumper_for_loop (loop, stmt))
  		continue;
! 	      /* If the statement uses inner loop ivs, we == screwed.  */
! 	      for (j = 1; VEC_iterate (tree, loopivs, j, iv); j++)
! 		if (stmt_uses_op (stmt, iv))
! 		  goto fail;
! 	      
  	      /* If this is a scalar operation that can be put back
  	         into the inner loop, or after the inner loop, through
  		 copying, then do so. This works on the theory that
--- 2225,2237 ----
  	{
  	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
  	    { 
  	      tree stmt = bsi_stmt (bsi);
! 
  	      if (stmt == exit_condition
  		  || not_interesting_stmt (stmt)
  		  || stmt_is_bumper_for_loop (loop, stmt))
  		continue;
! 
  	      /* If this is a scalar operation that can be put back
  	         into the inner loop, or after the inner loop, through
  		 copying, then do so. This works on the theory that
*************** can_convert_to_perfect_nest (struct loop
*** 2258,2267 ****
  		 win we get from rearranging the memory walk
  		 the loop is doing so that it has better
  		 cache behavior.  */
! 	      if (TREE_CODE (stmt) == MODIFY_EXPR
! 		  && (can_put_in_inner_loop (loop->inner, stmt)
! 		      || can_put_after_inner_loop (loop, stmt)))
! 		continue;
  
  	      /* Otherwise, if the bb of a statement we care about isn't
  		 dominated by the header of the inner loop, then we can't
--- 2240,2282 ----
  		 win we get from rearranging the memory walk
  		 the loop is doing so that it has better
  		 cache behavior.  */
! 	      if (TREE_CODE (stmt) == MODIFY_EXPR)
! 		{
! 		  use_operand_p use_a, use_b;
! 		  imm_use_iterator imm_iter;
! 		  ssa_op_iter op_iter;
! 		  tree op0 = TREE_OPERAND (stmt, 0);
! 		  tree scev = instantiate_parameters
! 		    (loop, analyze_scalar_evolution (loop, op0));
! 
! 		  /* If the IV is simple, it can be duplicated.  */
! 		  if (!automatically_generated_chrec_p (scev))
! 		    {
! 		      tree step = evolution_part_in_loop_num (scev, loop->num);
! 		      if (step && step != chrec_dont_know 
! 			  && TREE_CODE (step) == INTEGER_CST)
! 			continue;
! 		    }
! 
! 		  /* The statement should not define a variable used
! 		     in the inner loop.  */
! 		  if (TREE_CODE (op0) == SSA_NAME)
! 		    FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
! 		      if (bb_for_stmt (USE_STMT (use_a))->loop_father
! 			  == loop->inner)
! 			goto fail;
! 
! 		  /* The variables should not be used in both loops.  */
! 		  FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
! 		    FOR_EACH_IMM_USE_FAST (use_b, imm_iter, USE_FROM_PTR (use_a))
! 		      if (bb_for_stmt (USE_STMT (use_b))->loop_father
! 			  == loop->inner)
! 			goto fail;
! 
! 		  if (can_put_in_inner_loop (loop->inner, stmt)
! 		      || can_put_after_inner_loop (loop, stmt))
! 		    continue;
! 		}
  
  	      /* Otherwise, if the bb of a statement we care about isn't
  		 dominated by the header of the inner loop, then we can't
*************** perfect_nestify (struct loops *loops,
*** 2351,2364 ****
    tree stmt;
    tree oldivvar, ivvar, ivvarinced;
    VEC(tree,heap) *phis = NULL;
! 
!   if (!can_convert_to_perfect_nest (loop, loopivs))
!     return false;
! 
!   /* Create the new loop */
! 
    olddest = loop->single_exit->dest;
!   preheaderbb =  loop_split_edge_with (loop->single_exit, NULL);
    headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
    
    /* Push the exit phi nodes that we are moving.  */
--- 2366,2375 ----
    tree stmt;
    tree oldivvar, ivvar, ivvarinced;
    VEC(tree,heap) *phis = NULL;
!   
!   /* Create the new loop.  */
    olddest = loop->single_exit->dest;
!   preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
    headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
    
    /* Push the exit phi nodes that we are moving.  */
*************** perfect_nestify (struct loops *loops,
*** 2490,2496 ****
  		    }
  		  
  		  /* Make copies of this statement to put it back next
! 		     to its uses. */
  		  FOR_EACH_IMM_USE_STMT (imm_stmt, imm_iter, 
  					 TREE_OPERAND (stmt, 0))
  		    {
--- 2501,2507 ----
  		    }
  		  
  		  /* Make copies of this statement to put it back next
! 		     to its uses.  */
  		  FOR_EACH_IMM_USE_STMT (imm_stmt, imm_iter, 
  					 TREE_OPERAND (stmt, 0))
  		    {
*************** perfect_nestify (struct loops *loops,
*** 2506,2513 ****
--- 2517,2526 ----
  			  newname = SSA_NAME_VAR (newname);
  			  newname = make_ssa_name (newname, newstmt);
  			  TREE_OPERAND (newstmt, 0) = newname;
+ 
  			  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
  			    SET_USE (use_p, newname);
+ 
  			  bsi_insert_before (&tobsi, newstmt, BSI_SAME_STMT);
  			  update_stmt (newstmt);
  			  update_stmt (imm_stmt);
*************** perfect_nestify (struct loops *loops,
*** 2535,2544 ****
  		      continue;
  		    }
  		  
! 		  replace_uses_equiv_to_x_with_y (loop, stmt, 
! 						  oldivvar,  
! 						  VEC_index (int, steps, 0),
! 						  ivvar);
  		  bsi_move_before (&bsi, &tobsi);
  		  
  		  /* If the statement has any virtual operands, they may
--- 2548,2556 ----
  		      continue;
  		    }
  		  
! 		  replace_uses_equiv_to_x_with_y 
! 		    (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar);
! 
  		  bsi_move_before (&bsi, &tobsi);
  		  
  		  /* If the statement has any virtual operands, they may
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]