This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Non-dominating loop bounds in tree-ssa-loop-niter 2/4


On Tue, 30 Oct 2012, Jan Hubicka wrote:

> Hi,
> this patch implements the second part of planned change - to determine loop bounds
> based by shortest path discovery.  This allows to bound number of iterations
> on loops with bounds in statements that do not dominate the latch.
> 
> I originally planned to implement this as part of maybe_lower_iteration_bound,
> but I found doing it separately is more readable.  While both performs walk of
> the loop body, both do that for different reasons.
> 
> discover_iteration_bound_by_body_walk walks from header to latch, while
> maybe_lower_iteration_bound walks from header to first statement with side
> effects looking if there is exit on the way.
> 
> Both walks can be skipped for many loops, but each with different reasons.
> 
> Bootstrapped/regtested x86_64-linux, OK?
> 
> 	* tree-ssa-loop-niter.c (double_int_cmp, bound_index,
> 	discover_iteration_bound_by_body_walk): New functions.
> 	(discover_iteration_bound_by_body_walk): Use it.
> 
> 	* gcc.dg/tree-ssa/loop-38.c: New testcase.
> 
> Index: tree-ssa-loop-niter.c
> ===================================================================
> --- tree-ssa-loop-niter.c	(revision 192989)
> +++ tree-ssa-loop-niter.c	(working copy)
> @@ -2955,6 +2955,234 @@ gcov_type_to_double_int (gcov_type val)
>    return ret;
>  }
>  
> +/* Compare double ints, callback for qsort.  */
> +
> +int
> +double_int_cmp (const void *p1, const void *p2)
> +{
> +  const double_int *d1 = (const double_int *)p1;
> +  const double_int *d2 = (const double_int *)p2;
> +  if (*d1 == *d2)
> +    return 0;
> +  if (d1->ult (*d2))
> +    return -1;
> +  return 1;
> +}
> +
> +/* Return index of BOUND in BOUNDS array sorted in increasing order.
> +   Lookup by binary search.  */
> +
> +int
> +bound_index (VEC (double_int, heap) *bounds, double_int bound)
> +{
> +  unsigned int end = VEC_length (double_int, bounds);
> +  unsigned int begin = 0;
> +
> +  /* Find a matching index by means of a binary search.  */
> +  while (begin != end)
> +    {
> +      unsigned int middle = (begin + end) / 2;
> +      double_int index = VEC_index (double_int, bounds, middle);
> +
> +      if (index == bound)
> +	return middle;
> +      else if (index.ult (bound))
> +	begin = middle + 1;
> +      else
> +	end = middle;
> +    }
> +  gcc_unreachable ();
> +}
> +
> +/* Used to hold vector of queues of basic blocks bellow.  */
> +typedef VEC (basic_block, heap) *bb_queue;
> +DEF_VEC_P(bb_queue);
> +DEF_VEC_ALLOC_P(bb_queue,heap);
> +
> +/* We recorded loop bounds only for statements dominating loop latch (and thus
> +   executed each loop iteration).  If there are any bounds on statements not
> +   dominating the loop latch we can improve the estimate by walking the loop
> +   body and seeing if every path from loop header to loop latch contains
> +   some bounded statement.  */
> +
> +static void
> +discover_iteration_bound_by_body_walk (struct loop *loop)
> +{
> +  pointer_map_t *bb_bounds;
> +  struct nb_iter_bound *elt;
> +  VEC (double_int, heap) *bounds = NULL;
> +  VEC (bb_queue, heap) *queues = NULL;
> +  bb_queue queue = NULL;
> +  ptrdiff_t queue_index;
> +  ptrdiff_t latch_index = 0;
> +  pointer_map_t *visited;
> +
> +  /* Discover what bounds may interest us.  */
> +  for (elt = loop->bounds; elt; elt = elt->next)
> +    {
> +      double_int bound = elt->bound;
> +
> +      /* Exit terminates loop at given iteration, while non-exits produce undefined
> +	 effect on the next iteration.  */
> +      if (!elt->is_exit)
> +	{
> +	  bound += double_int_one;
> +	  /* Overflow, give up on this bound.  */
> +	  if (bound == double_int_zero)
> +	    continue;
> +	}
> +
> +      if (!loop->any_upper_bound
> +	  || bound.ult (loop->nb_iterations_upper_bound))
> +        VEC_safe_push (double_int, heap, bounds, bound);
> +    }
> +
> +  /* Exit early if there is nothing to do.  */
> +  if (!bounds)
> +    return;
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    fprintf (dump_file, " Trying to walk loop body to reduce the bound.\n");
> +
> +  /* Sort the bounds in decreasing order.  */
> +  qsort (VEC_address (double_int, bounds), VEC_length (double_int, bounds),
> +	 sizeof (double_int), double_int_cmp);
> +
> +  /* For every basic block record the lowest bound that is guaranteed to
> +     terminate the loop.  */
> +
> +  bb_bounds = pointer_map_create ();
> +  for (elt = loop->bounds; elt; elt = elt->next)
> +    {
> +      double_int bound = elt->bound;
> +      if (!elt->is_exit)
> +	{
> +	  bound += double_int_one;
> +	  /* Overflow, give up on this bound.  */
> +	  if (bound == double_int_zero)
> +	    continue;
> +	}
> +
> +      if (!loop->any_upper_bound
> +	  || bound.ult (loop->nb_iterations_upper_bound))
> +	{
> +	  ptrdiff_t index = bound_index (bounds, bound);
> +	  void **entry = pointer_map_contains (bb_bounds,
> +					       gimple_bb (elt->stmt));
> +	  if (!entry)
> +	    *pointer_map_insert (bb_bounds,
> +				 gimple_bb (elt->stmt)) = (void *)index;
> +	  else if ((ptrdiff_t)*entry > index)
> +	    *entry = (void *)index;
> +	}
> +    }
> +
> +  visited = pointer_map_create ();

visited is a poor name for a map ...

Otherwise looks ok.

Thanks,
Richard.

> +
> +  /* Perform shortest path discovery loop->header ... loop->latch.
> +
> +     The "distance" is given by the smallest loop bound of basic block
> +     present in the path and we look for path with largest smallest bound
> +     on it.
> +
> +     To avoid the need for fibonaci heap on double ints we simply compress
> +     double ints into indexes to BOUNDS array and then represent the queue
> +     as arrays of queues for every index.
> +     Index of VEC_length (BOUNDS) means that the execution of given BB has
> +     no bounds determined.
> +
> +     VISITED is a pointer map translating basic block into smallest index
> +     it was inserted into the priority queue with.  */
> +  latch_index = -1;
> +
> +  /* Start walk in loop header with index set to infinite bound.  */
> +  queue_index = VEC_length (double_int, bounds);
> +  VEC_safe_grow_cleared (bb_queue, heap, queues, queue_index + 1);
> +  VEC_safe_push (basic_block, heap, queue, loop->header);
> +  VEC_replace (bb_queue, queues, queue_index, queue);
> +  *pointer_map_insert (visited, loop->header) = (void *)queue_index;
> +
> +  for (; queue_index >= 0; queue_index--)
> +    {
> +      if (latch_index < queue_index)
> +	{
> +	  while (VEC_length (basic_block,
> +			     VEC_index (bb_queue, queues, queue_index)))
> +	    {
> +	      basic_block bb;
> +	      ptrdiff_t bound_index = queue_index;
> +	      void **entry;
> +              edge e;
> +              edge_iterator ei;
> +
> +	      queue = VEC_index (bb_queue, queues, queue_index);
> +	      bb = VEC_pop (basic_block, queue);
> +
> +	      /* OK, we later increased BB priority, skip it.  */
> +	      if ((ptrdiff_t)*pointer_map_contains (visited, bb) > queue_index)
> +		continue;
> +
> +	      /* See if we can improve the bound.  */
> +	      entry = pointer_map_contains (bb_bounds, bb);
> +	      if (entry && (ptrdiff_t)*entry < bound_index)
> +		bound_index = (ptrdiff_t)*entry;
> +
> +	      /* Insert succesors into the queue, watch for latch edge
> +		 and record greatest index we saw.  */
> +	      FOR_EACH_EDGE (e, ei, bb->succs)
> +		{
> +		  bool insert = false;
> +		  void **entry;
> +
> +		  if (loop_exit_edge_p (loop, e))
> +		    continue;
> +
> +		  if (e == loop_latch_edge (loop)
> +		      && latch_index < bound_index)
> +		    latch_index = bound_index;
> +		  else if (!(entry = pointer_map_contains (visited, e->dest)))
> +		    {
> +		      insert = true;
> +		      *pointer_map_insert (visited, e->dest) = (void *)bound_index;
> +		    }
> +		  else if ((ptrdiff_t)*entry < bound_index)
> +		    {
> +		      insert = true;
> +		      *entry = (void *)bound_index;
> +		    }
> +		    
> +		  if (insert)
> +		    {
> +		      bb_queue queue2 = VEC_index (bb_queue, queues, bound_index);
> +		      VEC_safe_push (basic_block, heap, queue2, e->dest);
> +		      VEC_replace (bb_queue, queues, bound_index, queue2);
> +		    }
> +		}
> +	    }
> +	}
> +      else
> +	VEC_free (basic_block, heap, VEC_index (bb_queue, queues, queue_index));
> +    }
> +
> +  gcc_assert (latch_index >= 0);
> +  if (latch_index < VEC_length (double_int, bounds))
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +	{
> +	  fprintf (dump_file, "Found better loop bound ");
> +	  dump_double_int (dump_file,
> +			   VEC_index (double_int, bounds, latch_index), true);
> +	  fprintf (dump_file, "\n");
> +	}
> +      record_niter_bound (loop, VEC_index (double_int, bounds, latch_index),
> +			  false, true);
> +    }
> +
> +  VEC_free (bb_queue, heap, queues);
> +  pointer_map_destroy (bb_bounds);
> +  pointer_map_destroy (visited);
> +}
> +
>  /* See if every path cross the loop goes through a statement that is known
>     to not execute at the last iteration. In that case we can decrese iteration
>     count by 1.  */
> @@ -3102,6 +3330,8 @@ estimate_numbers_of_iterations_loop (str
>  
>    infer_loop_bounds_from_undefined (loop);
>  
> +  discover_iteration_bound_by_body_walk (loop);
> +
>    maybe_lower_iteration_bound (loop);
>  
>    /* If we have a measured profile, use it to estimate the number of
> Index: testsuite/gcc.dg/tree-ssa/loop-38.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/loop-38.c	(revision 0)
> +++ testsuite/gcc.dg/tree-ssa/loop-38.c	(revision 0)
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-cunrolli-details" } */
> +int a[10];
> +int b[11];
> +t(int n)
> +{
> +   int i;
> +   int sum = 0;
> +   for (i=0;i<n;i++)
> +     if (q())
> +	sum+=a[i];
> +     else
> +	sum+=b[i];
> +  return sum;
> +}
> +/* { dg-final { scan-tree-dump "Found better loop bound 11" "cunrolli" } } */
> +/* { dg-final { scan-tree-dump "Loop 1 iterates at most 11 times" "cunrolli" } } */
> +/* { dg-final { cleanup-tree-dump "cunrolli" } } */
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]