This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Fix estimation of array accesses


On Thu, 25 Oct 2012, Jan Hubicka wrote:

> Hi,
> while looking into cunroll issues I noticed that we miss quite lot of important unrolling
> at -O2 for EON because we think it will increase code size.  This is because we do not
> account the fact that making array index constant is good thing.

Btw, this reminds me of work I was doing one and a half years ago ...
rewrite the unroll heuristic to consider the whole nest instead
of iteratively unrolling (and thus fighting with the fact the
just unrolled bodies are not quite optimized).  The heuristics
try to discover CSE opportunities - designed to help figure out
complete unrolling of a 5-level deep loop (calculix ...) is
profitable.  It went nowhere (still needs large max-unroll-insns
increase).

Patches included for reference (not sure what the 2nd ontop of
the 1st adds ;))

Of course it very likely doesn't apply anymore.

Richard.

Index: gcc/tree-ssa-loop-ivcanon.c
===================================================================
*** gcc/tree-ssa-loop-ivcanon.c.orig	2011-03-31 15:01:35.000000000 +0200
--- gcc/tree-ssa-loop-ivcanon.c	2011-03-31 15:32:43.000000000 +0200
*************** tree_num_loop_insns (struct loop *loop,
*** 131,147 ****
  struct loop_size
  {
    /* Number of instructions in the loop.  */
!   int overall;
  
    /* Number of instructions that will be likely optimized out in
       peeled iterations of loop  (i.e. computation based on induction
       variable where induction variable starts at known constant.)  */
!   int eliminated_by_peeling;
  
    /* Same statistics for last iteration of loop: it is smaller because
       instructions after exit are not executed.  */
!   int last_iteration;
!   int last_iteration_eliminated_by_peeling;
  };
  
  /* Return true if OP in STMT will be constant after peeling LOOP.  */
--- 131,147 ----
  struct loop_size
  {
    /* Number of instructions in the loop.  */
!   unsigned int overall;
  
    /* Number of instructions that will be likely optimized out in
       peeled iterations of loop  (i.e. computation based on induction
       variable where induction variable starts at known constant.)  */
!   unsigned int eliminated_by_peeling;
  
    /* Same statistics for last iteration of loop: it is smaller because
       instructions after exit are not executed.  */
!   unsigned int last_iteration;
!   unsigned int last_iteration_eliminated_by_peeling;
  };
  
  /* Return true if OP in STMT will be constant after peeling LOOP.  */
*************** estimated_unrolled_size (struct loop_siz
*** 316,321 ****
--- 316,379 ----
    return unr_insns;
  }
  
+ /* Unroll LOOP completely, i.e. NITER times.
+    EXIT is the exit of the loop that should be eliminated.  */
+ 
+ static bool
+ unroll_loop_completely (struct loop *loop, edge exit,
+ 			unsigned HOST_WIDE_INT n_unroll)
+ {
+   gimple cond;
+ 
+   if (n_unroll)
+     {
+       sbitmap wont_exit;
+       edge e;
+       unsigned i;
+       VEC (edge, heap) *to_remove = NULL;
+ 
+       initialize_original_copy_tables ();
+       wont_exit = sbitmap_alloc (n_unroll + 1);
+       sbitmap_ones (wont_exit);
+       RESET_BIT (wont_exit, 0);
+ 
+       if (!gimple_duplicate_loop_to_header_edge (loop,
+ 						 loop_preheader_edge (loop),
+ 						 n_unroll, wont_exit,
+ 						 exit, &to_remove,
+ 						 DLTHE_FLAG_UPDATE_FREQ
+ 						 | DLTHE_FLAG_COMPLETTE_PEEL))
+ 	{
+           free_original_copy_tables ();
+ 	  free (wont_exit);
+ 	  return false;
+ 	}
+ 
+       FOR_EACH_VEC_ELT (edge, to_remove, i, e)
+ 	{
+ 	  bool ok = remove_path (e);
+ 	  gcc_assert (ok);
+ 	}
+ 
+       VEC_free (edge, heap, to_remove);
+       free (wont_exit);
+       free_original_copy_tables ();
+     }
+ 
+   cond = last_stmt (exit->src);
+   if (exit->flags & EDGE_TRUE_VALUE)
+     gimple_cond_make_true (cond);
+   else
+     gimple_cond_make_false (cond);
+   update_stmt (cond);
+   update_ssa (TODO_update_ssa);
+ 
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
+ 
+   return true;
+ }
+ 
  /* Tries to unroll LOOP completely, i.e. NITER times.
     UL determines which loops we are allowed to unroll.
     EXIT is the exit of the loop that should be eliminated.  */
*************** try_unroll_loop_completely (struct loop
*** 326,332 ****
  			    enum unroll_level ul)
  {
    unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
-   gimple cond;
    struct loop_size size;
  
    if (loop->inner)
--- 384,389 ----
*************** try_unroll_loop_completely (struct loop
*** 376,427 ****
  	}
      }
  
!   if (n_unroll)
!     {
!       sbitmap wont_exit;
!       edge e;
!       unsigned i;
!       VEC (edge, heap) *to_remove = NULL;
! 
!       initialize_original_copy_tables ();
!       wont_exit = sbitmap_alloc (n_unroll + 1);
!       sbitmap_ones (wont_exit);
!       RESET_BIT (wont_exit, 0);
  
!       if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
! 						 n_unroll, wont_exit,
! 						 exit, &to_remove,
! 						 DLTHE_FLAG_UPDATE_FREQ
! 						 | DLTHE_FLAG_COMPLETTE_PEEL))
! 	{
!           free_original_copy_tables ();
! 	  free (wont_exit);
! 	  return false;
! 	}
  
!       FOR_EACH_VEC_ELT (edge, to_remove, i, e)
! 	{
! 	  bool ok = remove_path (e);
! 	  gcc_assert (ok);
! 	}
  
!       VEC_free (edge, heap, to_remove);
!       free (wont_exit);
!       free_original_copy_tables ();
      }
- 
-   cond = last_stmt (exit->src);
-   if (exit->flags & EDGE_TRUE_VALUE)
-     gimple_cond_make_true (cond);
    else
!     gimple_cond_make_false (cond);
!   update_stmt (cond);
!   update_ssa (TODO_update_ssa);
  
!   if (dump_file && (dump_flags & TDF_DETAILS))
!     fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
  
!   return true;
  }
  
  /* Adds a canonical induction variable to LOOP if suitable.
--- 433,475 ----
  	}
      }
  
!   return unroll_loop_completely (loop, exit, n_unroll);
! }
  
! /* Try to determine and return the number of iterations of LOOP.  If
!    we succeed, an expression giving the number of iterations is returned
!    and *EXIT is set to the edge from that the information is obtained.
!    Otherwise chrec_dont_know is returned.
!    If TRY_EVAL is true, we try to determine the number of iterations of
!    a loop by direct evaluation.  */
  
! static tree
! find_loop_niter_and_exit (struct loop *loop, bool try_eval, edge *exit)
! {
!   tree niter;
  
!   niter = number_of_latch_executions (loop);
!   if (TREE_CODE (niter) == INTEGER_CST)
!     {
!       *exit = single_exit (loop);
!       if (!just_once_each_iteration_p (loop, (*exit)->src))
! 	return chrec_dont_know;
      }
    else
!     {
!       /* If the loop has more than one exit, try checking all of them
! 	 for # of iterations determinable through scev.  */
!       if (!single_exit (loop))
! 	niter = find_loop_niter (loop, exit);
  
!       /* Finally if everything else fails, try brute force evaluation.  */
!       if (try_eval
! 	  && (chrec_contains_undetermined (niter)
! 	      || TREE_CODE (niter) != INTEGER_CST))
! 	niter = find_loop_niter_by_eval (loop, exit);
!     }
  
!   return niter;
  }
  
  /* Adds a canonical induction variable to LOOP if suitable.
*************** canonicalize_loop_induction_variables (s
*** 438,467 ****
    edge exit = NULL;
    tree niter;
  
!   niter = number_of_latch_executions (loop);
!   if (TREE_CODE (niter) == INTEGER_CST)
!     {
!       exit = single_exit (loop);
!       if (!just_once_each_iteration_p (loop, exit->src))
! 	return false;
!     }
!   else
!     {
!       /* If the loop has more than one exit, try checking all of them
! 	 for # of iterations determinable through scev.  */
!       if (!single_exit (loop))
! 	niter = find_loop_niter (loop, &exit);
! 
!       /* Finally if everything else fails, try brute force evaluation.  */
!       if (try_eval
! 	  && (chrec_contains_undetermined (niter)
! 	      || TREE_CODE (niter) != INTEGER_CST))
! 	niter = find_loop_niter_by_eval (loop, &exit);
! 
!       if (chrec_contains_undetermined (niter)
! 	  || TREE_CODE (niter) != INTEGER_CST)
! 	return false;
!     }
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
--- 486,495 ----
    edge exit = NULL;
    tree niter;
  
!   niter = find_loop_niter_and_exit (loop, try_eval, &exit);
!   if (chrec_contains_undetermined (niter)
!       || TREE_CODE (niter) != INTEGER_CST)
!     return false;
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
*************** canonicalize_induction_variables (void)
*** 505,510 ****
--- 533,793 ----
    return 0;
  }
  
+ 
+ struct stmt_info {
+   unsigned int dependent_on;
+   unsigned int constant_in;
+ };
+ struct stmt_info *stmt_infos;
+ 
+ static struct stmt_info *
+ get_stmt_info (gimple stmt)
+ {
+   return &stmt_infos[gimple_uid (stmt)];
+ }
+ 
+ static void
+ set_dependent_on (gimple stmt, unsigned int dependent_on)
+ {
+   get_stmt_info (stmt)->dependent_on = dependent_on;
+ }
+ 
+ static void
+ set_constant_in (gimple stmt, unsigned int constant_in)
+ {
+   get_stmt_info (stmt)->constant_in = constant_in;
+ }
+ 
+ static unsigned int
+ op_dependent_on (tree op)
+ {
+   if (DECL_P (op)
+       || is_gimple_min_invariant (op))
+     return 0;
+ 
+   if (TREE_CODE (op) == SSA_NAME)
+     {
+       gimple def_stmt = SSA_NAME_DEF_STMT (op);
+ 
+       if (gimple_nop_p (def_stmt))
+ 	return 0;
+       return get_stmt_info (def_stmt)->dependent_on;
+     }
+ 
+   if (TREE_CODE (op) == ADDR_EXPR)
+     op = TREE_OPERAND (op, 0);
+ 
+   if (REFERENCE_CLASS_P (op))
+     {
+       unsigned int dependent_on = 0;
+       while (handled_component_p (op)
+ 	     || TREE_CODE (op) == MEM_REF
+ 	     || TREE_CODE (op) == TARGET_MEM_REF)
+ 	{
+ 	  unsigned i;
+ 	  for (i = 1; i < TREE_CODE_LENGTH (TREE_CODE (op)); ++i)
+ 	    if (TREE_OPERAND (op, i))
+ 	      dependent_on |= op_dependent_on (TREE_OPERAND (op, i));
+ 	  op = TREE_OPERAND (op, 0);
+ 	}
+       return dependent_on | op_dependent_on (op);
+     }
+ 
+   return ~0U;
+ }
+ 
+ static unsigned int
+ op_constant_in (tree op)
+ {
+   if (is_gimple_min_invariant (op))
+     return ~0;
+ 
+   if (TREE_CODE (op) == SSA_NAME)
+     {
+       gimple def_stmt = SSA_NAME_DEF_STMT (op);
+ 
+       if (gimple_nop_p (def_stmt))
+ 	return 0;
+       return get_stmt_info (def_stmt)->constant_in;
+     }
+ 
+   return 0;
+ }
+ 
+ struct unroll_info {
+     struct loop *loop;
+     edge exit;
+     tree niter;
+     bool unroll_p;
+ };
+ 
+ static void
+ unroll_cost_for_loop (struct loop *loop, struct unroll_info *uinfo,
+ 		      unsigned int *original_size_,
+ 		      unsigned int *original_time_,
+ 		      unsigned int *unrolled_size_,
+ 		      unsigned int *unrolled_time_)
+ {
+   basic_block *bbs = get_loop_body (loop);
+   unsigned i;
+   unsigned int original_size = 0;
+   unsigned int unrolled_size = 0;
+   unsigned int original_time = 0;
+   unsigned int unrolled_time = 0;
+ 
+   for (i = 0; i < loop->num_nodes; ++i)
+     {
+       basic_block bb = bbs[i];
+       gimple_stmt_iterator gsi;
+       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ 	{
+ 	  gimple phi = gsi_stmt (gsi);
+ 	  unsigned int dependent_on = get_stmt_info (phi)->dependent_on;
+ 	  unsigned int constant_in = get_stmt_info (phi)->constant_in;
+ 	  unsigned int this_cost = 1;
+ 	  unsigned int this_original_size, this_original_time;
+ 	  unsigned int this_unrolled_time;
+ 	  struct loop *ploop;
+ 
+ 	  /* PHIs in loop headers are free.  */
+ 	  if (bb->loop_father->header == bb)
+ 	    this_cost = 0;
+ 
+ 	  this_original_size = this_cost;
+ 	  original_size += this_original_size;
+ 	  this_original_time = this_cost
+ 	      * (TREE_INT_CST_LOW (uinfo[bb->loop_father->num].niter) + 1);
+ 	  original_time += this_original_time;
+ 
+ 	  /* If all loops this stmt is _not_ constant in are unrolled
+ 	     the stmt will optimize to a constant.  */
+ 	  for (ploop = bb->loop_father;; ploop = loop_outer (ploop))
+ 	    {
+ 	      /* If the stmt is constant in this loop then its free.  */
+ 	      if (constant_in & (1 << loop_depth (ploop)))
+ 		{
+ 		  this_cost = 0;
+ 		  break;
+ 		}
+ 	      if (ploop == loop_outer (loop))
+ 		break;
+ 	      if (!uinfo[ploop->num].unroll_p)
+ 		break;
+ 	    }
+ 
+ 	  /* It's unlikely that we can CSE control flow, so don't assume
+ 	     any simplifications when PHIs become invariant.  */
+ 	  this_unrolled_time = this_cost
+ 	    * (TREE_INT_CST_LOW (uinfo[bb->loop_father->num].niter) + 1);
+ 	  unrolled_time += this_unrolled_time;
+ 
+ 	  if (dump_file)
+ 	    {
+ 	      print_gimple_stmt (dump_file, phi, 0, dump_flags);
+ 	      fprintf (dump_file,
+ 		       "  dependent %u, constant in %u, unrolled size %u,"
+ 		       " original size %u, unrolled time %u, original time %u\n",
+ 		       dependent_on, constant_in, this_cost,
+ 		       this_original_size, this_unrolled_time,
+ 		       this_original_time);
+ 	    }
+ 	  unrolled_size += this_cost;
+ 	}
+       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ 	{
+ 	  gimple stmt = gsi_stmt (gsi);
+ 	  unsigned int this_cost = 1;
+ 	  unsigned int this_original_time, this_unrolled_time;
+ 	  unsigned int this_original_size;
+ 	  unsigned int dependent_on = get_stmt_info (stmt)->dependent_on;
+ 	  unsigned int constant_in = get_stmt_info (stmt)->constant_in;
+ 	  struct loop *ploop;
+ 
+ 	  /* If the stmt is constant in this loop then its free.  */
+ 	  if (constant_in & (1 << loop_depth (bb->loop_father)))
+ 	    this_cost = 0;
+ 
+ 	  this_original_size = this_cost;
+ 	  original_size += this_original_size;
+ 
+ 	  /* If the loop is not unrolled or the stmt does not depend
+ 	     on this loops induction variables it can be CSEd.  */
+ 	  this_original_time = this_cost;
+ 	  for (ploop = bb->loop_father;
+ 	       ploop != loop_outer (loop);
+ 	       ploop = loop_outer (ploop))
+ 	    if (dependent_on & (1 << loop_depth (ploop)))
+ 	      break;
+ 	  for (; ploop != loop_outer (loop); ploop = loop_outer (ploop))
+ 	    this_original_time
+ 	      *= TREE_INT_CST_LOW (uinfo[ploop->num].niter) + 1;
+ 	  original_time += this_original_time;
+ 
+ 	  /* If all loops this stmt is _not_ constant in are unrolled
+ 	     the stmt will optimize to a constant.  */
+ 	  for (ploop = bb->loop_father;; ploop = loop_outer (ploop))
+ 	    {
+ 	      /* If the stmt is constant in this loop then its free.  */
+ 	      if (constant_in & (1 << loop_depth (ploop)))
+ 		{
+ 		  this_cost = 0;
+ 		  break;
+ 		}
+ 	      if (ploop == loop_outer (loop))
+ 		break;
+ 	      if (!uinfo[ploop->num].unroll_p)
+ 		break;
+ 	    }
+ 
+ 	  /* If the loop is not unrolled or the stmt does not depend
+ 	     on this loops induction variables it can be CSEd.  */
+ 	  this_unrolled_time = this_cost;
+ 	  for (ploop = bb->loop_father;; ploop = loop_outer (ploop))
+ 	    {
+ 	      if (dependent_on & (1 << loop_depth (ploop)))
+ 		{
+ 		  if (uinfo[ploop->num].unroll_p)
+ 		    this_cost
+ 		      *= TREE_INT_CST_LOW (uinfo[ploop->num].niter) + 1;
+ 		  this_unrolled_time
+ 		    *= TREE_INT_CST_LOW (uinfo[ploop->num].niter) + 1;
+ 		}
+ 	      if (ploop == loop)
+ 		break;
+ 	    }
+ 	  unrolled_time += this_unrolled_time;
+ 
+ 	  if (dump_file)
+ 	    {
+ 	      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
+ 	      fprintf (dump_file,
+ 		       "  dependent %u, constant in %u, unrolled size %u,"
+ 		       " original size %u, unrolled time %u, original time %u\n",
+ 		       dependent_on, constant_in, this_cost,
+ 		       this_original_size, this_unrolled_time,
+ 		       this_original_time);
+ 	    }
+ 	  unrolled_size += this_cost;
+ 	}
+     }
+ 
+   free (bbs);
+ 
+   if (dump_file)
+     {
+       fprintf (dump_file,
+ 	       "  Overall loop original size %u, unrolled size %u, "
+ 	       "original time %u, unrolled time %u\n",
+ 	       original_size, unrolled_size, original_time, unrolled_time);
+     }
+ 
+ 
+   *original_size_ = original_size;
+   *unrolled_size_ = unrolled_size;
+   *original_time_ = original_time;
+   *unrolled_time_ = unrolled_time;
+ }
+ 
  /* Unroll LOOPS completely if they iterate just few times.  Unless
     MAY_INCREASE_SIZE is true, perform the unrolling only if the
     size of the code does not increase.  */
*************** tree_unroll_loops_completely (bool may_i
*** 515,539 ****
    loop_iterator li;
    struct loop *loop;
    bool changed;
!   enum unroll_level ul;
!   int iteration = 0;
  
    do
      {
        changed = false;
  
        FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
  	{
! 	  if (may_increase_size && optimize_loop_for_speed_p (loop)
! 	      /* Unroll outermost loops only if asked to do so or they do
! 		 not cause code growth.  */
! 	      && (unroll_outer
! 		  || loop_outer (loop_outer (loop))))
! 	    ul = UL_ALL;
! 	  else
! 	    ul = UL_NO_GROWTH;
! 	  changed |= canonicalize_loop_induction_variables
! 		       (loop, false, ul, !flag_tree_loop_ivcanon);
  	}
  
        if (changed)
--- 798,992 ----
    loop_iterator li;
    struct loop *loop;
    bool changed;
!   struct unroll_info *uinfo;
!   unsigned i;
! 
!   renumber_gimple_stmt_uids ();
! 
!   stmt_infos = XCNEWVEC (struct stmt_info, gimple_stmt_max_uid (cfun));
!   uinfo = XCNEWVEC (struct unroll_info, number_of_loops ());
! 
!   for (i = 0; i < gimple_stmt_max_uid (cfun); ++i)
!     {
!       stmt_infos[i].dependent_on = ~0;
!       stmt_infos[i].constant_in = 0;
!     }
! 
!   for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
!     {
!       basic_block *bbs = get_loop_body_in_dom_order (loop);
!       unsigned i;
!       for (i = 0; i < loop->num_nodes; ++i)
! 	{
! 	  gimple_stmt_iterator gsi;
! 	  basic_block bb = bbs[i];
! 
! 	  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
! 	    {
! 	      gimple phi = gsi_stmt (gsi);
! 	      struct loop *sloop = loop_containing_stmt (phi);
! 	      unsigned dependent_on = 0;
! 	      unsigned constant_in = 0;
! 	      unsigned j;
! 	      affine_iv iv;
! 	      for (j = 0; j < gimple_phi_num_args (phi); ++j)
! 		{
! 		  tree arg = gimple_phi_arg_def (phi, j);
! 		  /* A loop backedge makes the stmt loop dependent.  */
! 		  if (gimple_phi_arg_edge (phi, j)->src == sloop->latch)
! 		    dependent_on |= 1 << loop_depth (sloop);
! 		  else
! 		    dependent_on |= op_dependent_on (arg);
! 		  constant_in &= op_constant_in (arg);
! 		}
! 	      if (simple_iv (sloop, sloop, gimple_phi_result (phi), &iv, false)
! 		  && is_gimple_min_invariant (iv.base)
! 		  && is_gimple_min_invariant (iv.step))
! 		constant_in = (1 << loop_depth (sloop)) - 1;
! 	      set_dependent_on (phi, dependent_on);
! 	      set_constant_in (phi, constant_in);
! 	    }
! 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
! 	    {
! 	      gimple stmt = gsi_stmt (gsi);
! 	      tree fndecl;
! 	      unsigned int dependent_on = ~0U;
! 	      unsigned int constant_in = 0;
! 	      if (is_gimple_assign (stmt))
! 		{
! 		  unsigned j;
! 		  /* For stores DSE applies dependent on the LHS.  */
! 		  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
! 		    dependent_on = op_dependent_on (gimple_assign_lhs (stmt));
! 		  else
! 		    {
! 		    dependent_on = op_dependent_on (gimple_op (stmt, 1));
! 		    constant_in = op_constant_in (gimple_op (stmt, 1));
! 		    for (j = 2; j < gimple_num_ops (stmt); ++j)
! 		      {
! 			dependent_on |= op_dependent_on (gimple_op (stmt, j));
! 			constant_in &= op_constant_in (gimple_op (stmt, j));
! 		      }
! 		    }
! 		}
! 	      else if (gimple_code (stmt) == GIMPLE_COND)
! 		{
! 		  dependent_on = op_dependent_on (gimple_cond_lhs (stmt));
! 		  dependent_on |= op_dependent_on (gimple_cond_rhs (stmt));
! 		  constant_in = op_constant_in (gimple_cond_lhs (stmt));
! 		  constant_in &= op_constant_in (gimple_cond_rhs (stmt));
! 		}
! 	      else if (is_gimple_call (stmt)
! 		       && (fndecl = gimple_call_fndecl (stmt)) != NULL_TREE
! 		       && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
! 		       && gimple_call_flags (stmt) & (ECF_PURE|ECF_CONST))
! 		{
! 		  unsigned j;
! 		  dependent_on = 0;
! 		  constant_in = ~0L;
! 		  for (j = 1; j < gimple_num_ops (stmt); ++j)
! 		    {
! 		      if (!gimple_op (stmt, j))
! 			continue;
! 		      dependent_on |= op_dependent_on (gimple_op (stmt, j));
! 		      constant_in &= op_constant_in (gimple_op (stmt, j));
! 		    }
! 		  /* Do not assume we can constant fold pure functions.  */
! 		  if (!(gimple_call_flags (stmt) & ECF_CONST))
! 		    constant_in = 0;
! 		}
! 	      set_dependent_on (stmt, dependent_on);
! 	      set_constant_in (stmt, constant_in);
! 	    }
! 	}
! 
!       free (bbs);
!     }
  
+   /* Find a niter/exit pair for each loop.  */
+   FOR_EACH_LOOP (li, loop, 0)
+     {
+       struct unroll_info *ui = &uinfo[loop->num];
+       if (!loop_outer (loop))
+ 	continue;
+ 
+       ui->niter = find_loop_niter_and_exit (loop, false, &ui->exit);
+       ui->unroll_p = (host_integerp (ui->niter, 1)
+ 		      && (TREE_INT_CST_LOW (ui->niter)
+ 			  <= (unsigned HOST_WIDE_INT)
+ 			       PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)));
+     }
+ 
+   /* Remove the unroll flag on loops that are off limits.  */
+   FOR_EACH_LOOP (li, loop, 0)
+     {
+       struct unroll_info *ui = &uinfo[loop->num];
+       unsigned int original_size;
+       unsigned int unrolled_size;
+       unsigned int original_time;
+       unsigned int unrolled_time;
+ 
+       if (!loop_outer (loop))
+ 	continue;
+ 
+       if (!unroll_outer
+ 	  && !loop_outer (loop_outer (loop)))
+ 	{
+ 	  ui->unroll_p = false;
+ 	  continue;
+ 	}
+ 
+       /* If we decided to unroll our parent loop then we have committed
+          to unrolling ourselves (if possible).  Don't change that.  */
+       if (uinfo[loop_outer (loop)->num].unroll_p)
+ 	continue;
+ 
+       if (dump_file && (dump_flags & TDF_DETAILS))
+ 	fprintf (dump_file, "Considering loop %u and siblings\n", loop->num);
+ 
+       unroll_cost_for_loop (loop, uinfo, &original_size, &original_time,
+ 			    &unrolled_size, &unrolled_time);
+       if (dump_file && (dump_flags & TDF_DETAILS))
+ 	fprintf (dump_file, "  Loop size: %u\n", original_size);
+       if (dump_file && (dump_flags & TDF_DETAILS))
+ 	fprintf (dump_file, "  Estimated size after unrolling: %u\n",
+ 		 unrolled_size);
+       if (!may_increase_size
+ 	  && original_size < unrolled_size)
+ 	{
+ 	  if (dump_file && (dump_flags & TDF_DETAILS))
+ 	    fprintf (dump_file, "Not unrolling loop %d (size would grow).\n",
+ 		     loop->num);
+ 	  ui->unroll_p = false;
+ 	}
+       else if (unrolled_size > original_size
+ 	       && (unrolled_size
+ 		   > (unsigned int)
+ 		        PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
+ 	{
+ 	  if (dump_file && (dump_flags & TDF_DETAILS))
+ 	    fprintf (dump_file, "Not unrolling loop %d "
+ 		     "(--param max-completely-peeled-insns limit reached).\n",
+ 		     loop->num);
+ 	  ui->unroll_p = false;
+ 	}
+     }
+ 
+   if (dump_file)
+     dump_function_to_file (cfun->decl, dump_file, dump_flags);
+ 
+   /* And finally commit the unroll decision.  */
    do
      {
        changed = false;
  
        FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
  	{
! 	  struct unroll_info *ui = &uinfo[loop->num];
! 	  if (!ui->unroll_p)
! 	    continue;
! 	  changed |= unroll_loop_completely (loop, ui->exit,
! 					     TREE_INT_CST_LOW (ui->niter));
  	}
  
        if (changed)
*************** tree_unroll_loops_completely (bool may_i
*** 549,556 ****
  	  scev_reset ();
  	}
      }
!   while (changed
! 	 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
  
    return 0;
  }
--- 1002,1011 ----
  	  scev_reset ();
  	}
      }
!   while (changed);
! 
!   free (uinfo);
!   free (stmt_infos);
  
    return 0;
  }



------------



Index: trunk/gcc/tree-ssa-loop-ivcanon.c
===================================================================
*** trunk.orig/gcc/tree-ssa-loop-ivcanon.c	2011-04-06 12:53:47.000000000 +0200
--- trunk/gcc/tree-ssa-loop-ivcanon.c	2011-04-07 14:40:04.000000000 +0200
*************** canonicalize_induction_variables (void)
*** 533,559 ****
    return 0;
  }
  
  
  struct stmt_info {
    unsigned int dependent_on;
    unsigned int constant_in;
  };
  struct stmt_info *stmt_infos;
  
  static struct stmt_info *
! get_stmt_info (gimple stmt)
  {
    return &stmt_infos[gimple_uid (stmt)];
  }
  
  static void
! set_dependent_on (gimple stmt, unsigned int dependent_on)
  {
    get_stmt_info (stmt)->dependent_on = dependent_on;
  }
  
  static void
! set_constant_in (gimple stmt, unsigned int constant_in)
  {
    get_stmt_info (stmt)->constant_in = constant_in;
  }
--- 533,569 ----
    return 0;
  }
  
+ typedef struct {
+   unsigned int loop_index;
+   tree step;
+ } stmt_ref_index_t;
+ 
+ DEF_VEC_O(stmt_ref_index_t);
+ DEF_VEC_ALLOC_O(stmt_ref_index_t,heap);
  
  struct stmt_info {
    unsigned int dependent_on;
    unsigned int constant_in;
+   tree base;
+   tree index_base;
+   VEC(stmt_ref_index_t, heap) *indices;
  };
  struct stmt_info *stmt_infos;
  
  static struct stmt_info *
! get_stmt_info (const_gimple stmt)
  {
    return &stmt_infos[gimple_uid (stmt)];
  }
  
  static void
! set_dependent_on (const_gimple stmt, unsigned int dependent_on)
  {
    get_stmt_info (stmt)->dependent_on = dependent_on;
  }
  
  static void
! set_constant_in (const_gimple stmt, unsigned int constant_in)
  {
    get_stmt_info (stmt)->constant_in = constant_in;
  }
*************** struct unroll_info {
*** 621,626 ****
--- 631,680 ----
      bool unroll_p;
  };
  
+ static hashval_t
+ hash_stmt_access (const void *stmt_)
+ {
+   const_gimple stmt = (const_gimple) stmt_;
+   struct stmt_info *si = get_stmt_info (stmt);
+   return iterative_hash_expr (si->base, 0);
+ }
+ 
+ static struct unroll_info *current_uinfo;
+ static int
+ stmt_accesses_equal_p (const void *stmt1_, const void *stmt2_)
+ {
+   struct unroll_info *uinfo = current_uinfo;
+   const_gimple stmt1 = (const_gimple) stmt1_;
+   const_gimple stmt2 = (const_gimple) stmt2_;
+   struct stmt_info *i1 = get_stmt_info (stmt1);
+   struct stmt_info *i2 = get_stmt_info (stmt2);
+   unsigned i;
+   stmt_ref_index_t *idx1, *idx2;
+   if (!i1->base || !i2->base)
+     return false;
+   if (VEC_length (stmt_ref_index_t, i1->indices)
+       != VEC_length (stmt_ref_index_t, i2->indices))
+     return false;
+   if (!operand_equal_p (i1->base, i2->base, 0)
+       || !operand_equal_p (i1->index_base, i2->index_base, 0))
+     return false;
+   FOR_EACH_VEC_ELT (stmt_ref_index_t, i1->indices, i, idx1)
+     {
+       idx2 = VEC_index (stmt_ref_index_t, i2->indices, i);
+       if (!operand_equal_p (idx1->step, idx2->step, 0))
+ 	return false;
+       if (idx1->loop_index == idx2->loop_index)
+ 	continue;
+       if (!uinfo[idx1->loop_index].unroll_p
+ 	  || !uinfo[idx2->loop_index].unroll_p)
+ 	return false;
+       if (!tree_int_cst_equal (uinfo[idx1->loop_index].niter,
+ 			       uinfo[idx2->loop_index].niter))
+ 	return false;
+     }
+   return true;
+ }
+ 
  static void
  unroll_cost_for_loop (struct loop *loop, struct unroll_info *uinfo,
  		      unsigned int *original_size_,
*************** unroll_cost_for_loop (struct loop *loop,
*** 634,640 ****
--- 688,698 ----
    unsigned int unrolled_size = 0;
    unsigned int original_time = 0;
    unsigned int unrolled_time = 0;
+   htab_t visited_accesses;
  
+   current_uinfo = uinfo;
+   visited_accesses = htab_create (23, hash_stmt_access,
+ 				  stmt_accesses_equal_p, NULL);
    for (i = 0; i < loop->num_nodes; ++i)
      {
        basic_block bb = bbs[i];
*************** unroll_cost_for_loop (struct loop *loop,
*** 699,707 ****
  	  unsigned int this_cost = 1;
  	  unsigned int this_original_time, this_unrolled_time;
  	  unsigned int this_original_size;
! 	  unsigned int dependent_on = get_stmt_info (stmt)->dependent_on;
! 	  unsigned int constant_in = get_stmt_info (stmt)->constant_in;
  	  struct loop *ploop;
  
  	  /* If the stmt is constant in this loop then its free.  */
  	  if (constant_in & (1 << loop_depth (bb->loop_father)))
--- 757,767 ----
  	  unsigned int this_cost = 1;
  	  unsigned int this_original_time, this_unrolled_time;
  	  unsigned int this_original_size;
! 	  struct stmt_info *si = get_stmt_info (stmt);
! 	  unsigned int dependent_on = si->dependent_on;
! 	  unsigned int constant_in = si->constant_in;
  	  struct loop *ploop;
+ 	  bool redundant_access_fn = false;
  
  	  /* If the stmt is constant in this loop then its free.  */
  	  if (constant_in & (1 << loop_depth (bb->loop_father)))
*************** unroll_cost_for_loop (struct loop *loop,
*** 755,777 ****
  	      if (ploop == loop)
  		break;
  	    }
! 	  unrolled_time += this_unrolled_time;
  
  	  if (dump_file)
  	    {
  	      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
  	      fprintf (dump_file,
  		       "  dependent %u, constant in %u, unrolled size %u,"
! 		       " original size %u, unrolled time %u, original time %u\n",
  		       dependent_on, constant_in, this_cost,
  		       this_original_size, this_unrolled_time,
  		       this_original_time);
  	    }
- 	  unrolled_size += this_cost;
  	}
      }
  
    free (bbs);
  
    if (dump_file)
      {
--- 815,854 ----
  	      if (ploop == loop)
  		break;
  	    }
! 
! 	  if (si->base != NULL_TREE)
! 	    {
! 	      void **slot;
! 	      slot = htab_find_slot (visited_accesses, stmt, INSERT);
! 	      redundant_access_fn = (*slot != NULL);
! 	      if (!*slot)
! 		*slot = (void *) stmt;
! 	    }
  
  	  if (dump_file)
  	    {
  	      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
  	      fprintf (dump_file,
  		       "  dependent %u, constant in %u, unrolled size %u,"
! 		       " original size %u, unrolled time %u, original time %u",
  		       dependent_on, constant_in, this_cost,
  		       this_original_size, this_unrolled_time,
  		       this_original_time);
+ 	      if (redundant_access_fn)
+ 		fprintf (dump_file,
+ 			 ", access redundant");
+ 	      fprintf (dump_file, "\n");
+ 	    }
+ 	  if (!redundant_access_fn)
+ 	    {
+ 	      unrolled_time += this_unrolled_time;
+ 	      unrolled_size += this_cost;
  	    }
  	}
      }
  
    free (bbs);
+   htab_delete (visited_accesses);
  
    if (dump_file)
      {
*************** unroll_cost_for_loop (struct loop *loop,
*** 788,793 ****
--- 865,968 ----
    *unrolled_time_ = unrolled_time;
  }
  
+ /* Sort the vector of indices by constant step and then by loop index.  */
+ 
+ static int
+ compare_index (const void *i1_, const void *i2_)
+ {
+   const stmt_ref_index_t *i1 = (const stmt_ref_index_t *)i1_;
+   const stmt_ref_index_t *i2 = (const stmt_ref_index_t *)i2_;
+   if (TREE_CODE (i1->step) == INTEGER_CST)
+     {
+       if (TREE_CODE (i2->step) == INTEGER_CST)
+ 	{
+ 	  int res = tree_int_cst_compare (i1->step, i2->step);
+ 	  if (res == 0)
+ 	    return i1->loop_index - i2->loop_index;
+ 	  return res;
+ 	}
+       else
+ 	return -1;
+     }
+   else if (TREE_CODE (i2->step) == INTEGER_CST)
+     return 1;
+   return i1->loop_index - i2->loop_index;
+ }
+ 
+ static void
+ analyze_ref_evolution (gimple stmt, tree ref)
+ {
+   tree base;
+   HOST_WIDE_INT bitsize, bitpos;
+   tree offset, ev, iev;
+   enum machine_mode mode;
+   int dummy;
+   struct loop *loop = loop_containing_stmt (stmt);
+   struct stmt_info *sinfo = get_stmt_info (stmt);
+ 
+   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
+ 			      &dummy, &dummy, false);
+ 
+   /* ???  Eventually factor in a pointer base of a MEM_REF base.  */
+   if (!offset)
+     return;
+ 
+   if (dump_file)
+     {
+       fprintf (dump_file, "Evolutions for ");
+       print_generic_expr (dump_file, ref, 0);
+       fprintf (dump_file, ", offset ");
+       print_generic_expr (dump_file, offset, 0);
+       fprintf (dump_file, "\n");
+     }
+   ev = analyze_scalar_evolution (loop, offset);
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+       fprintf (dump_file, "  evolution ");
+       print_generic_expr (dump_file, ev, 0);
+       fprintf (dump_file, "\n");
+     }
+   iev = instantiate_scev (block_before_loop (current_loops->tree_root),
+ 			  loop, ev);
+   if (dump_file)
+     {
+       fprintf (dump_file, "  instantiated ");
+       print_generic_expr (dump_file, iev, 0);
+       fprintf (dump_file, "\n");
+     }
+ 
+   sinfo->base = NULL_TREE;
+ 
+   if (iev == chrec_dont_know)
+     return;
+ 
+   sinfo->base = base;
+   while (TREE_CODE (iev) == POLYNOMIAL_CHREC)
+     {
+       stmt_ref_index_t idx;
+       idx.loop_index = CHREC_VARIABLE (iev);
+       idx.step = CHREC_RIGHT (iev);
+       VEC_safe_push (stmt_ref_index_t, heap, sinfo->indices, &idx);
+       iev = CHREC_LEFT (iev);
+     }
+   sinfo->index_base = iev;
+   VEC_qsort (stmt_ref_index_t, sinfo->indices, compare_index);
+ 
+   if (dump_file)
+     {
+       stmt_ref_index_t *idx;
+       unsigned i;
+       fprintf (dump_file, "  sorted ");
+       FOR_EACH_VEC_ELT (stmt_ref_index_t, sinfo->indices, i, idx)
+ 	{
+ 	  print_generic_expr (dump_file, idx->step, 0);
+ 	  fprintf (dump_file, "@%d + ", idx->loop_index);
+ 	}
+       print_generic_expr (dump_file, sinfo->index_base, 0);
+       fprintf (dump_file, "\n");
+     }
+ }
+ 
  /* Unroll LOOPS completely if they iterate just few times.  Unless
     MAY_INCREASE_SIZE is true, perform the unrolling only if the
     size of the code does not increase.  */
*************** tree_unroll_loops_completely (bool may_i
*** 857,872 ****
  		  unsigned j;
  		  /* For stores DSE applies dependent on the LHS.  */
  		  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
! 		    dependent_on = op_dependent_on (gimple_assign_lhs (stmt));
  		  else
  		    {
! 		    dependent_on = op_dependent_on (gimple_op (stmt, 1));
! 		    constant_in = op_constant_in (gimple_op (stmt, 1));
! 		    for (j = 2; j < gimple_num_ops (stmt); ++j)
! 		      {
! 			dependent_on |= op_dependent_on (gimple_op (stmt, j));
! 			constant_in &= op_constant_in (gimple_op (stmt, j));
! 		      }
  		    }
  		}
  	      else if (gimple_code (stmt) == GIMPLE_COND)
--- 1032,1053 ----
  		  unsigned j;
  		  /* For stores DSE applies dependent on the LHS.  */
  		  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
! 		    {
! 		      if (REFERENCE_CLASS_P (gimple_assign_lhs (stmt)))
! 			analyze_ref_evolution (stmt, gimple_assign_lhs (stmt));
! 		      dependent_on = op_dependent_on (gimple_assign_lhs (stmt));
! 		    }
  		  else
  		    {
! 		      if (REFERENCE_CLASS_P (gimple_assign_rhs1 (stmt)))
! 			analyze_ref_evolution (stmt, gimple_assign_rhs1 (stmt));
! 		      dependent_on = op_dependent_on (gimple_op (stmt, 1));
! 		      constant_in = op_constant_in (gimple_op (stmt, 1));
! 		      for (j = 2; j < gimple_num_ops (stmt); ++j)
! 			{
! 			  dependent_on |= op_dependent_on (gimple_op (stmt, j));
! 			  constant_in &= op_constant_in (gimple_op (stmt, j));
! 			}
  		    }
  		}
  	      else if (gimple_code (stmt) == GIMPLE_COND)
*************** tree_unroll_loops_completely (bool may_i
*** 915,920 ****
--- 1096,1104 ----
  		      && (TREE_INT_CST_LOW (ui->niter)
  			  <= (unsigned HOST_WIDE_INT)
  			       PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)));
+       /* ???  FIXME.  */
+       if (!host_integerp (ui->niter, 1))
+ 	ui->niter = integer_one_node;
      }
  
    /* Remove the unroll flag on loops that are off limits.  */
*************** tree_unroll_loops_completely (bool may_i
*** 929,934 ****
--- 1113,1121 ----
        if (!loop_outer (loop))
  	continue;
  
+       if (!ui->unroll_p)
+ 	continue;
+ 
        if (!unroll_outer
  	  && !loop_outer (loop_outer (loop)))
  	{


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]