This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Quadratic bottleneck in SSA propagation engine


Jan Hubicka wrote:
> Hi,
> the propagation engine on compile/20001226-1.c is behaving quadratically so VRP
> consume over 80% of overall compilation time.  The testcase consists of huge
> decision tree and one PHI node merging all the values computed in tree as
> return value.  The problem is that each time leaf of decision tree is processed
> by VRP, the huge PHI node is re-processed.  On the beggining of compilation
> (consuming 80% of overall compilation time) most of time is spent by skipping
> non-executable edges, later all edges are executable and very ineffecient
> merging code is used (I have patch for that too).
>
> I can think of several ways to deal with this:
>
>    1) Annotate each PHI with bitmap of edges to be considered.  Clearly only
>       the newly added edges, or edges representing arguments that was updated
>       needs to be considered.
>
>       There are few problems here
> 	- when PHI argument has changed, it is impossible to effectivly lookup
> 	  appropriate edge without introducing some hashtables for each
> 	  PHI too.
> 	- we need bitmaps for each PHI (since PHIs in given BB are not always
> 	  considered in block - PHI is either scheduled because of new edge
> 	  or because it's changed argument)
>    2) Reorder the queues of statements, so those expensive beasts are dealt
>       with later and hopefully more precise information is done in one run.
>
> I implemented 2) by simply adding secondary queues for "expensive" PHIs with
> over 4 incomming edges.
>
> This simple trick reduce compilation time from 35 seconds to 8 seconds overall
> (after optimizing the merge code that brings 20% speedup by itself).  One
> problem here is that for some reason the BB is re-scanned by the code each time
> new incomming edge becomes live.  It seems to me that only the PHIs needs to be
> updated since the statements will be scheduled by updating via SSA edges if
> PHIs has changed.
>
> Does this appear to make sense?
>
>   
You need to scan a basic block only once.  If the output of a phi
function changes, then all of the stmts that use the value that comes
out of the phi function need to be added to the worklist, but the block
itself does not need to be rescanned.  I believe that this is a bug.



> Bootstrapped/regtested i686-linux.  I can get some more performance data
> (gerald's testcase, tramp3d, combine, insn-attrtab) if this seems appropriate
> for mainline.
>
> Honza
>
> 	* tree-ssa-propagate.c (expensive_interesting_ssa_edges,
> 	expensive_varying_ssa_edges): New variables.
> 	(add_ssa_edge): Use expensive queues.
> 	(add_control_edge): When block was processed, add only the PHI nodes.
> 	(ssa_prop_init, ssa_prop_fini): Deal with new lists.
> 	(ssa_propagate): Process new lists.
> Index: tree-ssa-propagate.c
> ===================================================================
> *** tree-ssa-propagate.c	(revision 116908)
> --- tree-ssa-propagate.c	(working copy)
> *************** static sbitmap bb_in_list;
> *** 143,148 ****
> --- 143,149 ----
>      web.  For each D-U edge, we store the target statement or PHI node
>      U.  */
>   static GTY(()) VEC(tree,gc) *interesting_ssa_edges;
> + static GTY(()) VEC(tree,gc) *expensive_interesting_ssa_edges;
>   
>   /* Identical to INTERESTING_SSA_EDGES.  For performance reasons, the
>      list of SSA edges is split into two.  One contains all SSA edges
> *************** static GTY(()) VEC(tree,gc) *interesting
> *** 159,164 ****
> --- 160,166 ----
>      situations where lattice values move from
>      UNDEFINED->INTERESTING->VARYING instead of UNDEFINED->VARYING.  */
>   static GTY(()) VEC(tree,gc) *varying_ssa_edges;
> + static GTY(()) VEC(tree,gc) *expensive_varying_ssa_edges;
>   
>   
>   /* Return true if the block worklist empty.  */
> *************** add_ssa_edge (tree var, bool is_varying)
> *** 246,256 ****
>         if (!DONT_SIMULATE_AGAIN (use_stmt)
>   	  && !STMT_IN_SSA_EDGE_WORKLIST (use_stmt))
>   	{
>   	  STMT_IN_SSA_EDGE_WORKLIST (use_stmt) = 1;
>   	  if (is_varying)
> ! 	    VEC_safe_push (tree, gc, varying_ssa_edges, use_stmt);
>   	  else
> ! 	    VEC_safe_push (tree, gc, interesting_ssa_edges, use_stmt);
>   	}
>       }
>   }
> --- 248,276 ----
>         if (!DONT_SIMULATE_AGAIN (use_stmt)
>   	  && !STMT_IN_SSA_EDGE_WORKLIST (use_stmt))
>   	{
> + 	  bool expensive = false;
> + 
> + 	  /* PHI nodes with large amount of incomming edges are expensive to
> + 	     process.  We enqueue them separately and hope that large number
> + 	     of incomming edges will be updated before PHI node processed.  */
> + 	  if (TREE_CODE (use_stmt) == PHI_NODE
> + 	      && PHI_NUM_ARGS (use_stmt) > 4)
> + 	    expensive = true;
>   	  STMT_IN_SSA_EDGE_WORKLIST (use_stmt) = 1;
>   	  if (is_varying)
> ! 	    {
> ! 	      if (expensive)
> ! 	        VEC_safe_push (tree, gc, expensive_varying_ssa_edges, use_stmt);
> ! 	      else
> ! 	        VEC_safe_push (tree, gc, varying_ssa_edges, use_stmt);
> ! 	    }
>   	  else
> ! 	    {
> ! 	      if (expensive)
> ! 	        VEC_safe_push (tree, gc, expensive_interesting_ssa_edges, use_stmt);
> ! 	      else
> ! 	        VEC_safe_push (tree, gc, interesting_ssa_edges, use_stmt);
> ! 	    }
>   	}
>       }
>   }
> *************** add_control_edge (edge e)
> *** 275,281 ****
>     if (TEST_BIT (bb_in_list, bb->index))
>       return;
>   
> !   cfg_blocks_add (bb);
>   
>     if (dump_file && (dump_flags & TDF_DETAILS))
>       fprintf (dump_file, "Adding Destination of edge (%d -> %d) to worklist\n\n",
> --- 295,316 ----
>     if (TEST_BIT (bb_in_list, bb->index))
>       return;
>   
> !   /* Only PHI nodes needs updating if the BB was reachable previously.  */
> !   if (TEST_BIT (executable_blocks, bb->index))
> !     {
> !       tree phi;
> !       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
> ! 	if (!STMT_IN_SSA_EDGE_WORKLIST (phi))
> ! 	  { 
> ! 	    STMT_IN_SSA_EDGE_WORKLIST (phi) = true;
> ! 	    if (PHI_NUM_ARGS (phi) > 4)
> ! 	      VEC_safe_push (tree, gc, expensive_varying_ssa_edges, phi);
> ! 	    else
> ! 	      VEC_safe_push (tree, gc, varying_ssa_edges, phi);
> ! 	  }
> !     }
> !   else
> !     cfg_blocks_add (bb);
>   
>     if (dump_file && (dump_flags & TDF_DETAILS))
>       fprintf (dump_file, "Adding Destination of edge (%d -> %d) to worklist\n\n",
> *************** ssa_prop_init (void)
> *** 468,473 ****
> --- 503,510 ----
>     /* Worklists of SSA edges.  */
>     interesting_ssa_edges = VEC_alloc (tree, gc, 20);
>     varying_ssa_edges = VEC_alloc (tree, gc, 20);
> +   expensive_interesting_ssa_edges = VEC_alloc (tree, gc, 20);
> +   expensive_varying_ssa_edges = VEC_alloc (tree, gc, 20);
>   
>     executable_blocks = sbitmap_alloc (last_basic_block);
>     sbitmap_zero (executable_blocks);
> *************** ssa_prop_fini (void)
> *** 513,518 ****
> --- 550,557 ----
>   {
>     VEC_free (tree, gc, interesting_ssa_edges);
>     VEC_free (tree, gc, varying_ssa_edges);
> +   VEC_free (tree, gc, expensive_interesting_ssa_edges);
> +   VEC_free (tree, gc, expensive_varying_ssa_edges);
>     VEC_free (basic_block, heap, cfg_blocks);
>     cfg_blocks = NULL;
>     sbitmap_free (bb_in_list);
> *************** ssa_propagate (ssa_prop_visit_stmt_fn vi
> *** 667,673 ****
>     /* Iterate until the worklists are empty.  */
>     while (!cfg_blocks_empty_p () 
>   	 || VEC_length (tree, interesting_ssa_edges) > 0
> ! 	 || VEC_length (tree, varying_ssa_edges) > 0)
>       {
>         if (!cfg_blocks_empty_p ())
>   	{
> --- 706,714 ----
>     /* Iterate until the worklists are empty.  */
>     while (!cfg_blocks_empty_p () 
>   	 || VEC_length (tree, interesting_ssa_edges) > 0
> ! 	 || VEC_length (tree, expensive_interesting_ssa_edges) > 0
> ! 	 || VEC_length (tree, varying_ssa_edges) > 0
> ! 	 || VEC_length (tree, expensive_varying_ssa_edges) > 0)
>       {
>         if (!cfg_blocks_empty_p ())
>   	{
> *************** ssa_propagate (ssa_prop_visit_stmt_fn vi
> *** 678,687 ****
>   
>         /* In order to move things to varying as quickly as
>   	 possible,process the VARYING_SSA_EDGES worklist first.  */
> !       process_ssa_edge_worklist (&varying_ssa_edges);
>   
>         /* Now process the INTERESTING_SSA_EDGES worklist.  */
> !       process_ssa_edge_worklist (&interesting_ssa_edges);
>       }
>   
>     ssa_prop_fini ();
> --- 719,736 ----
>   
>         /* In order to move things to varying as quickly as
>   	 possible,process the VARYING_SSA_EDGES worklist first.  */
> !       do {
> !         process_ssa_edge_worklist (&varying_ssa_edges);
> ! 	if (cfg_blocks_empty_p ())
> !           process_ssa_edge_worklist (&expensive_varying_ssa_edges);
> !       } while (VEC_length (tree, varying_ssa_edges) > 0);
>   
>         /* Now process the INTERESTING_SSA_EDGES worklist.  */
> !       do {
> !         process_ssa_edge_worklist (&interesting_ssa_edges);
> ! 	if (cfg_blocks_empty_p ())
> !           process_ssa_edge_worklist (&expensive_interesting_ssa_edges);
> !       } while (VEC_length (tree, interesting_ssa_edges) > 0);
>       }
>   
>     ssa_prop_fini ();
>   


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]