[patch] for PR 18601

Zdenek Dvorak rakdver@atrey.karlin.mff.cuni.cz
Thu Nov 25 03:52:00 GMT 2004


Hello,

the problem in this PR is that a long chain (2000) of forwarder
basic blocks is created, with each of them having two predecessors.

We are performing the jump threading edge by edge.  Edge to the first
block of the chain is threaded over 2000 basic blocks, the second one
over 1999 blocks, etc., thus obtaining quadratic behavior (that
shows the worst in dominance info updating, since it is the most
time consuming part of the process).

Cfg cleanup jump threading as it stands just removes forwarder blocks,
in a quite complicated way (by redirecting the edges one by one).
The easiest solution seems then to be to replace it by a pass that
only performs removal of forwarders directly (by redirecting all edges
entering the forwarder at once to the successor, and removing the
forwarder).  This fixes the PR and also simplifies the code quite a bit.

Compile time on "normal" code also seems to be a bit better (about
0.5% improvement on compilation of preprocessed gcc sources on i686).

Further improvements are possible.  In the cases similar to the PR there
still is a problem when forwarders with too large indegree are created,
since then redirecting all the edges during the forwarder removal could
again cause quadratic behavior (a small bit better than before, since
redirecting the edges is much faster than updating the dominators, but
still not nice).  This can be avoided by choosing the order of basic
blocks in the worklist more carefully (reverse dfs order should be
fine).  I would however like to first have this patch approved (or not)
before I spend more time with it.

Bootstrapped & regtested on i686 and ia64.

Zdenek

	PR tree-optimization/18601
	* tree-cfg.c (thread_jumps, phi_alternatives_equal,
	thread_jumps_from_bb): Removed.
	(tree_forwarder_block_p): Do not consider blocks that are its own
	successors forwarders.
	(cleanup_forwarder_blocks, remove_forwarder_block): New functions.
	(cleanup_tree_cfg): Use cleanup_forwarder_blocks instead of
	thread_jumps.

Index: tree-cfg.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-cfg.c,v
retrieving revision 2.120
diff -c -3 -p -r2.120 tree-cfg.c
*** tree-cfg.c	23 Nov 2004 22:38:07 -0000	2.120
--- tree-cfg.c	24 Nov 2004 20:35:16 -0000
*************** static void split_critical_edges (void);
*** 119,125 ****
  static inline bool stmt_starts_bb_p (tree, tree);
  static int tree_verify_flow_info (void);
  static void tree_make_forwarder_block (edge);
- static bool thread_jumps (void);
  static bool tree_forwarder_block_p (basic_block);
  static void tree_cfg2vcg (FILE *);
  
--- 119,124 ----
*************** static bool cleanup_control_expr_graph (
*** 132,138 ****
  static edge find_taken_edge_cond_expr (basic_block, tree);
  static edge find_taken_edge_switch_expr (basic_block, tree);
  static tree find_case_label_for_value (tree, tree);
! static bool phi_alternatives_equal (basic_block, edge, edge);
  
  
  /*---------------------------------------------------------------------------
--- 131,137 ----
  static edge find_taken_edge_cond_expr (basic_block, tree);
  static edge find_taken_edge_switch_expr (basic_block, tree);
  static tree find_case_label_for_value (tree, tree);
! static bool cleanup_forwarder_blocks (void);
  
  
  /*---------------------------------------------------------------------------
*************** cleanup_tree_cfg (void)
*** 900,910 ****
    retval = cleanup_control_flow ();
    retval |= delete_unreachable_blocks ();
  
!   /* thread_jumps can redirect edges out of SWITCH_EXPRs, which can get
!      expensive.  So we want to enable recording of edge to CASE_LABEL_EXPR
!      mappings around the call to thread_jumps.  */
    start_recording_case_labels ();
!   retval |= thread_jumps ();
    end_recording_case_labels ();
  
  #ifdef ENABLE_CHECKING
--- 899,909 ----
    retval = cleanup_control_flow ();
    retval |= delete_unreachable_blocks ();
  
!   /* cleanup_forwarder_blocks can redirect edges out of SWITCH_EXPRs, which can
!      get expensive.  So we want to enable recording of edge to CASE_LABEL_EXPR
!      mappings around the call to cleanup_forwarder_blocks.  */
    start_recording_case_labels ();
!   retval |= cleanup_forwarder_blocks ();
    end_recording_case_labels ();
  
  #ifdef ENABLE_CHECKING
*************** cleanup_tree_cfg (void)
*** 912,918 ****
      {
        gcc_assert (!cleanup_control_flow ());
        gcc_assert (!delete_unreachable_blocks ());
!       gcc_assert (!thread_jumps ());
      }
  #endif
  
--- 911,917 ----
      {
        gcc_assert (!cleanup_control_flow ());
        gcc_assert (!delete_unreachable_blocks ());
!       gcc_assert (!cleanup_forwarder_blocks ());
      }
  #endif
  
*************** find_case_label_for_value (tree switch_e
*** 2256,2291 ****
    return default_case;
  }
  
- 
- /* If all the PHI nodes in DEST have alternatives for E1 and E2 and
-    those alternatives are equal in each of the PHI nodes, then return
-    true, else return false.  */
- 
- static bool
- phi_alternatives_equal (basic_block dest, edge e1, edge e2)
- {
-   tree phi, val1, val2;
-   int n1, n2;
- 
-   for (phi = phi_nodes (dest); phi; phi = PHI_CHAIN (phi))
-     {
-       n1 = phi_arg_from_edge (phi, e1);
-       n2 = phi_arg_from_edge (phi, e2);
- 
-       gcc_assert (n1 >= 0);
-       gcc_assert (n2 >= 0);
- 
-       val1 = PHI_ARG_DEF (phi, n1);
-       val2 = PHI_ARG_DEF (phi, n2);
- 
-       if (!operand_equal_p (val1, val2, 0))
- 	return false;
-     }
- 
-   return true;
- }
- 
- 
  /*---------------------------------------------------------------------------
  			      Debugging functions
  ---------------------------------------------------------------------------*/
--- 2255,2260 ----
*************** tree_forwarder_block_p (basic_block bb)
*** 3899,3904 ****
--- 3868,3875 ----
        || phi_nodes (bb)
        /* BB may not be a predecessor of EXIT_BLOCK_PTR.  */
        || EDGE_SUCC (bb, 0)->dest == EXIT_BLOCK_PTR
+       /* Nor should this be an infinite loop.  */
+       || EDGE_SUCC (bb, 0)->dest == bb
        /* BB may not have an abnormal outgoing edge.  */
        || (EDGE_SUCC (bb, 0)->flags & EDGE_ABNORMAL))
      return false; 
*************** tree_forwarder_block_p (basic_block bb)
*** 3931,4211 ****
    return true;
  }
  
! /* Thread jumps from BB.  */
  
  static bool
! thread_jumps_from_bb (basic_block bb)
  {
    edge_iterator ei;
!   edge e;
!   bool retval = false;
  
!   /* Examine each of our block's successors to see if it is
!      forwardable.  */
!   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
!     {
!       int freq;
!       gcov_type count;
!       edge last, old;
!       basic_block dest, tmp, curr, old_dest;
!       tree phi;
!       int arg;
! 
!       /* If the edge is abnormal or its destination is not
! 	 forwardable, then there's nothing to do.  */
!       if ((e->flags & EDGE_ABNORMAL)
! 	  || !bb_ann (e->dest)->forwardable)
! 	{
! 	  ei_next (&ei);
! 	  continue;
! 	}
  
!       /* Now walk through as many forwarder blocks as possible to find
! 	 the ultimate destination we want to thread our jump to.  */
!       last = EDGE_SUCC (e->dest, 0);
!       bb_ann (e->dest)->forwardable = 0;
!       for (dest = EDGE_SUCC (e->dest, 0)->dest;
! 	   bb_ann (dest)->forwardable;
! 	   last = EDGE_SUCC (dest, 0),
! 	     dest = EDGE_SUCC (dest, 0)->dest)
! 	bb_ann (dest)->forwardable = 0;
! 
!       /* Reset the forwardable marks to 1.  */
!       for (tmp = e->dest;
! 	   tmp != dest;
! 	   tmp = EDGE_SUCC (tmp, 0)->dest)
! 	bb_ann (tmp)->forwardable = 1;
  
!       if (dest == e->dest)
  	{
! 	  ei_next (&ei);
! 	  continue;
  	}
! 
!       old = find_edge (bb, dest);
!       if (old)
  	{
! 	  /* If there already is an edge, check whether the values in
! 	     phi nodes differ.  */
! 	  if (!phi_alternatives_equal (dest, last, old))
! 	    {
! 	      /* The previous block is forwarder.  Redirect our jump
! 		 to that target instead since we know it has no PHI
! 		 nodes that will need updating.  */
! 	      dest = last->src;
! 
! 	      /* That might mean that no forwarding at all is
! 		 possible.  */
! 	      if (dest == e->dest)
! 		{
! 		  ei_next (&ei);
! 		  continue;
! 		}
! 
! 	      old = find_edge (bb, dest);
! 	    }
  	}
  
!       /* Perform the redirection.  */
!       retval = true;
!       count = e->count;
!       freq = EDGE_FREQUENCY (e);
!       old_dest = e->dest;
!       e = redirect_edge_and_branch (e, dest);
! 
!       /* Update the profile.  */
!       if (profile_status != PROFILE_ABSENT)
! 	for (curr = old_dest;
! 	     curr != dest;
! 	     curr = EDGE_SUCC (curr, 0)->dest)
! 	  {
! 	    curr->frequency -= freq;
! 	    if (curr->frequency < 0)
! 	      curr->frequency = 0;
! 	    curr->count -= count;
! 	    if (curr->count < 0)
! 	      curr->count = 0;
! 	    EDGE_SUCC (curr, 0)->count -= count;
! 	    if (EDGE_SUCC (curr, 0)->count < 0)
! 	      EDGE_SUCC (curr, 0)->count = 0;
! 	  }
  
!       if (!old)
  	{
! 	  /* Update PHI nodes.  We know that the new argument should
! 	     have the same value as the argument associated with LAST.
! 	     Otherwise we would have changed our target block
! 	     above.  */
! 	  for (phi = phi_nodes (dest); phi; phi = PHI_CHAIN (phi))
  	    {
! 	      arg = phi_arg_from_edge (phi, last);
! 	      gcc_assert (arg >= 0);
! 	      add_phi_arg (&phi, PHI_ARG_DEF (phi, arg), e);
  	    }
  	}
  
!       /* Remove the unreachable blocks (observe that if all blocks
! 	 were reachable before, only those in the path we threaded
! 	 over and did not have any predecessor outside of the path
! 	 become unreachable).  */
!       for (; old_dest != dest; old_dest = tmp)
  	{
! 	  tmp = EDGE_SUCC (old_dest, 0)->dest;
! 
! 	  if (EDGE_COUNT (old_dest->preds) > 0)
! 	    break;
! 
! 	  delete_basic_block (old_dest);
  	}
  
!       /* Update the dominators.  */
!       if (dom_info_available_p (CDI_DOMINATORS))
  	{
! 	  /* If the dominator of the destination was in the
! 	     path, set its dominator to the start of the
! 	     redirected edge.  */
! 	  if (get_immediate_dominator (CDI_DOMINATORS, old_dest) == NULL)
! 	    set_immediate_dominator (CDI_DOMINATORS, old_dest, bb);
! 
! 	  /* Now proceed like if we forwarded just over one edge at a
! 	     time.  Algorithm for forwarding edge S --> A over
! 	     edge A --> B then is
! 
! 	     if (idom (B) == A
! 	         && !dominated_by (S, B))
! 	       idom (B) = idom (A);
! 	     recount_idom (A);  */
! 
! 	  for (; old_dest != dest; old_dest = tmp)
! 	    {
! 	      basic_block dom;
! 
! 	      tmp = EDGE_SUCC (old_dest, 0)->dest;
  
! 	      if (get_immediate_dominator (CDI_DOMINATORS, tmp) == old_dest
! 		  && !dominated_by_p (CDI_DOMINATORS, bb, tmp))
! 		{
! 		  dom = get_immediate_dominator (CDI_DOMINATORS, old_dest);
! 		  set_immediate_dominator (CDI_DOMINATORS, tmp, dom);
! 		}
  
! 	      dom = recount_dominator (CDI_DOMINATORS, old_dest);
! 	      set_immediate_dominator (CDI_DOMINATORS, old_dest, dom);
! 	    }
  	}
      }
  
!   return retval;
! }
  
  
! /* Thread jumps over empty statements.
  
!    This code should _not_ thread over obviously equivalent conditions
!    as that requires nontrivial updates to the SSA graph.
  
!    As a precondition, we require that all basic blocks be reachable.
!    That is, there should be no opportunities left for
!    delete_unreachable_blocks.  */
  
  static bool
! thread_jumps (void)
  {
    basic_block bb;
!   bool retval = false;
!   basic_block *worklist = xmalloc (sizeof (basic_block) * last_basic_block);
    basic_block *current = worklist;
  
    FOR_EACH_BB (bb)
      {
!       bb_ann (bb)->forwardable = tree_forwarder_block_p (bb);
!       bb->flags &= ~BB_VISITED;
      }
  
!   /* We pretend to have ENTRY_BLOCK_PTR in WORKLIST.  This way,
!      ENTRY_BLOCK_PTR will never be entered into WORKLIST.  */
!   ENTRY_BLOCK_PTR->flags |= BB_VISITED;
! 
!   /* Initialize WORKLIST by putting non-forwarder blocks that
!      immediately precede forwarder blocks because those are the ones
!      that we know we can thread jumps from.  We use BB_VISITED to
!      indicate whether a given basic block is in WORKLIST or not,
!      thereby avoiding duplicates in WORKLIST.  */
!   FOR_EACH_BB (bb)
!     {
!       edge_iterator ei;
!       edge e;
! 
!       /* We are not interested in finding non-forwarder blocks
! 	 directly.  We want to find non-forwarder blocks as
! 	 predecessors of a forwarder block.  */
!       if (!bb_ann (bb)->forwardable)
! 	continue;
! 
!       /* Now we know BB is a forwarder block.  Visit each of its
! 	 incoming edges and add to WORKLIST all non-forwarder blocks
! 	 among BB's predecessors.  */
!       FOR_EACH_EDGE (e, ei, bb->preds)
! 	{
! 	  /* We don't want to put a duplicate into WORKLIST.  */
! 	  if ((e->src->flags & BB_VISITED) == 0
! 	      /* We are not interested in threading jumps from a forwarder
! 		 block.  */
! 	      && !bb_ann (e->src)->forwardable)
! 	    {
! 	      e->src->flags |= BB_VISITED;
! 	      *current++ = e->src;
! 	    }
! 	}
!     }
! 
!   /* Now let's drain WORKLIST.  */
!   while (worklist != current)
      {
        bb = *--current;
! 
!       /* BB is no longer in WORKLIST, so clear BB_VISITED.  */
!       bb->flags &= ~BB_VISITED;
! 
!       if (thread_jumps_from_bb (bb))
! 	{
! 	  retval = true;
! 
! 	  if (tree_forwarder_block_p (bb))
! 	    {
! 	      edge_iterator ej;
! 	      edge f;
! 
! 	      bb_ann (bb)->forwardable = true;
! 
! 	      /* Attempts to thread through BB may have been blocked
! 		 because BB was not a forwarder block before.  Now
! 		 that BB is a forwarder block, we should revisit BB's
! 		 predecessors.  */
! 	      FOR_EACH_EDGE (f, ej, bb->preds)
! 		{
! 		  /* We don't want to put a duplicate into WORKLIST.  */
! 		  if ((f->src->flags & BB_VISITED) == 0
! 		      /* We are not interested in threading jumps from a
! 			 forwarder block.  */
! 		      && !bb_ann (f->src)->forwardable)
! 		    {
! 		      f->src->flags |= BB_VISITED;
! 		      *current++ = f->src;
! 		    }
! 		}
! 	    }
! 	}
      }
  
-   ENTRY_BLOCK_PTR->flags &= ~BB_VISITED;
- 
    free (worklist);
! 
!   return retval;
  }
  
- 
  /* Return a non-special label in the head of basic block BLOCK.
     Create one if it doesn't exist.  */
  
--- 3902,4092 ----
    return true;
  }
  
! /* Removes forwarder block BB.  Returns false if this failed.  If new forwarder
!    block is created due to redirection of edges, it is stored to worklist.  */
  
  static bool
! remove_forwarder_block (basic_block bb, basic_block **worklist)
  {
+   edge succ = EDGE_SUCC (bb, 0), e, s;
+   basic_block dest = succ->dest, dom, dombb, domdest;
+   tree label;
+   tree phi, def, sdef, *defs;
+   unsigned i, n;
    edge_iterator ei;
!   block_stmt_iterator bsi, bsi_to;
!   bool seen_abnormal_edge = false, dest_abnormal_edge = false;
  
!   /* We check for infinite loops already in tree_forwarder_block_p.  However
!      it may happen that the infinite loop is created afterwards due to
!      removal of forwarders.  */
!   if (dest == bb)
!     return false;
  
!   /* If the destination block consists of an nonlocal label, do not merge
!      it.  */
!   label = first_stmt (bb);
!   if (label
!       && TREE_CODE (label) == LABEL_EXPR
!       && DECL_NONLOCAL (LABEL_EXPR_LABEL (label)))
!     return false;
  
!   FOR_EACH_EDGE (e, ei, bb->preds)
!     {
!       if (e->flags & EDGE_ABNORMAL)
  	{
! 	  seen_abnormal_edge = true;
! 	  break;
  	}
!     }
!   FOR_EACH_EDGE (e, ei, dest->preds)
!     {
!       if (e->flags & EDGE_ABNORMAL)
  	{
! 	  dest_abnormal_edge = true;
! 	  break;
  	}
+     }
  
!   /* If there is an abnormal edge to basic block BB, but no
!      into dest, problems might occur during removal of the phi node
!      at out of ssa due to overlapping life ranges of registers.
! 
!      If there is an abnormal edge in DEST, the problems would occur
!      anyway since cleanup_dead_labels would then merge the labels for two
!      different eh regions, and rest of exception handling code does not
!      like it.
!      
!      So if there is an abnormal edge to BB, proceed only if there is no
!      abnormal edge to DEST and there are no phi nodes in DEST.  */
!   if (seen_abnormal_edge
!       && (dest_abnormal_edge
! 	  || phi_nodes (dest) != NULL_TREE))
!     return false;
! 
!   /* If there are phi nodes in DEST, and some of the blocks that are
!      predecessors of BB are also predecessors of DEST, check that the
!      phi node arguments match.  Also record the list of arguments that
!      come from SUCC edge, to avoid calling PHI_ARG_DEF_FROM_EDGE
!      too much.  */
!   n = 0;
!   for (phi = phi_nodes (dest); phi; phi = PHI_CHAIN (phi))
!     n++;
!   defs = xmalloc (n * sizeof (tree));
  
!   for (phi = phi_nodes (dest), i = 0; phi; phi = PHI_CHAIN (phi), i++)
!     {
!       def = PHI_ARG_DEF_FROM_EDGE (phi, succ);
!       defs[i] = def;
!   
!       FOR_EACH_EDGE (e, ei, bb->preds)
  	{
! 	  s = find_edge (e->src, dest);
! 	  if (!s)
! 	    continue;
! 	  sdef = PHI_ARG_DEF_FROM_EDGE (phi, s);
! 	  if (!operand_equal_p (def, sdef, 0))
  	    {
! 	      free (defs);
! 	      return false;
  	    }
  	}
+     }
  
!   /* Redirect the edges.  */
!   for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); )
!     {
!       if (e->flags & EDGE_ABNORMAL)
  	{
! 	  /* If there is an abnormal edge, redirect it anyway, and move the
! 	     labels to the new block to make it legal.  */
! 	  s = redirect_edge_succ_nodup (e, dest);
  	}
+       else
+ 	s = redirect_edge_and_branch (e, dest);
  
!       if (s == e)
  	{
! 	  /* Create arguments for the phi nodes, since the edge was not
! 	     here before.  */
! 	  for (phi = phi_nodes (dest), i = 0; phi; phi = PHI_CHAIN (phi), i++)
! 	    add_phi_arg (&phi, defs[i], s);
! 	}
!       else
! 	{
! 	  /* The source basic block might become a forwarder.  We know that it
! 	     was not forwarder before, since it used to have at least two
! 	     outgoing edges, so we may just add it to worklist.  */
! 	  if (tree_forwarder_block_p (s->src))
! 	    *(*worklist)++ = s->src;
! 	}
!     }
!   free (defs);
  
!   if (seen_abnormal_edge)
!     {
!       /* Move the labels to the new block, so that the redirection of the
! 	 abnormal edges works.  */
  
!       bsi_to = bsi_start (dest);
!       for (bsi = bsi_start (bb); !bsi_end_p (bsi); )
! 	{
! 	  label = bsi_stmt (bsi);
! 	  gcc_assert (TREE_CODE (label) == LABEL_EXPR);
! 	  bsi_remove (&bsi);
! 	  bsi_insert_before (&bsi_to, label, BSI_CONTINUE_LINKING);
  	}
      }
  
!   /* Update the dominators.  */
!   if (dom_info_available_p (CDI_DOMINATORS))
!     {
!       dombb = get_immediate_dominator (CDI_DOMINATORS, bb);
!       domdest = get_immediate_dominator (CDI_DOMINATORS, dest);
!       if (domdest == bb)
! 	{
! 	  /* Shortcut to avoid calling (relatively expensive)
! 	     nearest_common_dominator unless necessary.  */
! 	  dom = dombb;
! 	}
!       else
! 	dom = nearest_common_dominator (CDI_DOMINATORS, domdest, dombb);
  
+       set_immediate_dominator (CDI_DOMINATORS, dest, dom);
+     }
  
!   /* And kill the forwarder block.  */
!   delete_basic_block (bb);
  
!   return true;
! }
  
! /* Removes forwarder blocks.  */
  
  static bool
! cleanup_forwarder_blocks (void)
  {
    basic_block bb;
!   bool changed = false;
!   basic_block *worklist = xmalloc (sizeof (basic_block) * n_basic_blocks);
    basic_block *current = worklist;
  
    FOR_EACH_BB (bb)
      {
!       if (tree_forwarder_block_p (bb))
! 	*current++ = bb;
      }
  
!   while (current != worklist)
      {
        bb = *--current;
!       changed |= remove_forwarder_block (bb, &current);
      }
  
    free (worklist);
!   return changed;
  }
  
  /* Return a non-special label in the head of basic block BLOCK.
     Create one if it doesn't exist.  */
  



More information about the Gcc-patches mailing list