Optimize df_worklist_dataflow

Jan Hubicka hubicka@ucw.cz
Sun Jun 13 10:34:00 GMT 2010


Hi,
here is updated patch.
I noticed one more problem - df_worklist_propagate_forward sets bit for all blocks
to be recomputed into pending.  Some of them are however in worklist and thus
we process some blocks twice (and because we work in postorder it is most of them).

This is why the dataflow code collect so many profile samples for just walking
the control flow graph.  I fixed this by clearing the bit in pending list too.
This is not at all optimal, since setbit/clearbit is quite expensive.  We
should track the bit if the BB is already in one of worklists that I will play
with as followup.  The patch makes situation no-worse since it trades the new
clear_bit for the original clear_bit on worklist.

I think ideally we should set bit in worklist of all basic blocks whose index
is higher than index of current BB - think of function body consisting of
acyclic sequencence closed in loop.  First scan goes well as we process
everything in postorder.  Second scan however does not, we will process only
the first BB and then for each of them we will do swapping of worklist and
re-starting of scan that is slower than just walk of worklist.  This is bit
difficult to do as bitmap iterator does not allow changes in the bitmap it is
walking.

Well, since re-scanning is not that expensive, perhaps we can get around with
only the extra bit to avoid unnecesary bitmap traffic.

Current profile is as follows:
CPU: AMD64 family10, speed 800 MHz (estimated)
Counted CPU_CLK_UNHALTED events (Cycles outside of halt state) with a unit mask of 0x00 (No unit mask) count 750000
samples  %        symbol name
46017     4.1503  htab_find_slot_with_hash
14089     1.2707  bitmap_set_bit_1
13438     1.2120  df_note_compute
12353     1.1141  htab_traverse_noresize
11508     1.0379  df_worklist_dataflow
10687     0.9639  htab_find_with_hash
10395     0.9375  record_reg_classes.constprop.1
10368     0.9351  bitmap_clear_bit
9844      0.8878  ggc_internal_alloc_stat
8857      0.7988  walk_tree_1
8353      0.7534  bitmap_copy
7338      0.6618  bitmap_bit_p_1
7275      0.6561  bitmap_ior_into
7082      0.6387  constrain_operands
6887      0.6211  fast_dce

So worklist dataflow is almost twice as fast.

Bootstraped/regtested x86_64-linux, OK?

Honza
	
	* df-problems.c (df_rd_confluence_n, df_lr_confluence_n, df_live_confluence_n,
	df_byte_lr_confluence_n, df_md_confluence_n): Return true if something changed.
	* df.h (df_confluence_function_n): Return bool.
	* df-core.c (df_worklist_propagate_forward, df_worklist_propagate_backward):
	Track changes and ages.
	(df_worklist_dataflow_doublequeue): Use bitmap iterator for main walk;
	track ages.
	* dse.c (dse_confluence_n): Return always true.
Index: df-core.c
===================================================================
*** df-core.c	(revision 160663)
--- df-core.c	(working copy)
*************** struct rtl_opt_pass pass_df_finish =
*** 851,885 ****
     The general data flow analysis engine.
  ----------------------------------------------------------------------------*/
  
  
  /* Helper function for df_worklist_dataflow.
     Propagate the dataflow forward.
     Given a BB_INDEX, do the dataflow propagation
     and set bits on for successors in PENDING
!    if the out set of the dataflow has changed. */
  
! static void
  df_worklist_propagate_forward (struct dataflow *dataflow,
                                 unsigned bb_index,
                                 unsigned *bbindex_to_postorder,
                                 bitmap pending,
!                                sbitmap considered)
  {
    edge e;
    edge_iterator ei;
    basic_block bb = BASIC_BLOCK (bb_index);
  
    /*  Calculate <conf_op> of incoming edges.  */
    if (EDGE_COUNT (bb->preds) > 0)
      FOR_EACH_EDGE (e, ei, bb->preds)
        {
!         if (TEST_BIT (considered, e->src->index))
!           dataflow->problem->con_fun_n (e);
        }
    else if (dataflow->problem->con_fun_0)
      dataflow->problem->con_fun_0 (bb);
  
!   if (dataflow->problem->trans_fun (bb_index))
      {
        /* The out set of this block has changed.
           Propagate to the outgoing blocks.  */
--- 851,902 ----
     The general data flow analysis engine.
  ----------------------------------------------------------------------------*/
  
+ /* Return time BB when it was visited for last time.  */
+ #define BB_LAST_CHANGE_AGE(bb) ((ptrdiff_t)(bb)->aux)
  
  /* Helper function for df_worklist_dataflow.
     Propagate the dataflow forward.
     Given a BB_INDEX, do the dataflow propagation
     and set bits on for successors in PENDING
!    if the out set of the dataflow has changed.
  
!    AGE specify time when BB was visited last time.
!    AGE of 0 means we are visiting for first time and need to
!    compute transfer function to initialize datastructures.
!    Otherwise we re-do transfer function only if something change
!    while computing confluence functions.
!    We need to compute confluence only of basic block that are younger
!    then last visit of the BB.
! 
!    Return true if BB info has changed.  This is always the case
!    in the first visit.  */
! 
! static bool
  df_worklist_propagate_forward (struct dataflow *dataflow,
                                 unsigned bb_index,
                                 unsigned *bbindex_to_postorder,
                                 bitmap pending,
!                                sbitmap considered,
! 			       ptrdiff_t age)
  {
    edge e;
    edge_iterator ei;
    basic_block bb = BASIC_BLOCK (bb_index);
+   bool changed = !age;
  
    /*  Calculate <conf_op> of incoming edges.  */
    if (EDGE_COUNT (bb->preds) > 0)
      FOR_EACH_EDGE (e, ei, bb->preds)
        {
!         if (age <= BB_LAST_CHANGE_AGE (e->src)
! 	    && TEST_BIT (considered, e->src->index))
!           changed |= dataflow->problem->con_fun_n (e);
        }
    else if (dataflow->problem->con_fun_0)
      dataflow->problem->con_fun_0 (bb);
  
!   if (changed
!       && dataflow->problem->trans_fun (bb_index))
      {
        /* The out set of this block has changed.
           Propagate to the outgoing blocks.  */
*************** df_worklist_propagate_forward (struct da
*** 890,924 ****
            if (TEST_BIT (considered, ob_index))
              bitmap_set_bit (pending, bbindex_to_postorder[ob_index]);
          }
      }
  }
  
  
  /* Helper function for df_worklist_dataflow.
     Propagate the dataflow backward.  */
  
! static void
  df_worklist_propagate_backward (struct dataflow *dataflow,
                                  unsigned bb_index,
                                  unsigned *bbindex_to_postorder,
                                  bitmap pending,
!                                 sbitmap considered)
  {
    edge e;
    edge_iterator ei;
    basic_block bb = BASIC_BLOCK (bb_index);
  
    /*  Calculate <conf_op> of incoming edges.  */
    if (EDGE_COUNT (bb->succs) > 0)
      FOR_EACH_EDGE (e, ei, bb->succs)
        {
!         if (TEST_BIT (considered, e->dest->index))
!           dataflow->problem->con_fun_n (e);
        }
    else if (dataflow->problem->con_fun_0)
      dataflow->problem->con_fun_0 (bb);
  
!   if (dataflow->problem->trans_fun (bb_index))
      {
        /* The out set of this block has changed.
           Propagate to the outgoing blocks.  */
--- 907,947 ----
            if (TEST_BIT (considered, ob_index))
              bitmap_set_bit (pending, bbindex_to_postorder[ob_index]);
          }
+       return true;
      }
+   return false;
  }
  
  
  /* Helper function for df_worklist_dataflow.
     Propagate the dataflow backward.  */
  
! static bool
  df_worklist_propagate_backward (struct dataflow *dataflow,
                                  unsigned bb_index,
                                  unsigned *bbindex_to_postorder,
                                  bitmap pending,
!                                 sbitmap considered,
! 			        ptrdiff_t age)
  {
    edge e;
    edge_iterator ei;
    basic_block bb = BASIC_BLOCK (bb_index);
+   bool changed = !age;
  
    /*  Calculate <conf_op> of incoming edges.  */
    if (EDGE_COUNT (bb->succs) > 0)
      FOR_EACH_EDGE (e, ei, bb->succs)
        {
!         if (age <= BB_LAST_CHANGE_AGE (e->dest)
! 	    && TEST_BIT (considered, e->dest->index))
!           changed |= dataflow->problem->con_fun_n (e);
        }
    else if (dataflow->problem->con_fun_0)
      dataflow->problem->con_fun_0 (bb);
  
!   if (changed
!       && dataflow->problem->trans_fun (bb_index))
      {
        /* The out set of this block has changed.
           Propagate to the outgoing blocks.  */
*************** df_worklist_propagate_backward (struct d
*** 929,986 ****
            if (TEST_BIT (considered, ob_index))
              bitmap_set_bit (pending, bbindex_to_postorder[ob_index]);
          }
      }
  }
  
  
  
! /* This will free "pending". */
  
  static void
  df_worklist_dataflow_doublequeue (struct dataflow *dataflow,
  			  	  bitmap pending,
                                    sbitmap considered,
                                    int *blocks_in_postorder,
! 				  unsigned *bbindex_to_postorder)
  {
    enum df_flow_dir dir = dataflow->problem->dir;
    int dcount = 0;
    bitmap worklist = BITMAP_ALLOC (&df_bitmap_obstack);
  
    /* Double-queueing. Worklist is for the current iteration,
       and pending is for the next. */
    while (!bitmap_empty_p (pending))
      {
        /* Swap pending and worklist. */
        bitmap temp = worklist;
        worklist = pending;
        pending = temp;
  
!       do
  	{
- 	  int index;
  	  unsigned bb_index;
  	  dcount++;
  
! 	  index = bitmap_first_set_bit (worklist);
! 	  bitmap_clear_bit (worklist, index);
! 
  	  bb_index = blocks_in_postorder[index];
! 
  	  if (dir == DF_FORWARD)
! 	    df_worklist_propagate_forward (dataflow, bb_index,
! 					   bbindex_to_postorder,
! 					   pending, considered);
  	  else
! 	    df_worklist_propagate_backward (dataflow, bb_index,
! 					    bbindex_to_postorder,
! 					    pending, considered);
  	}
!       while (!bitmap_empty_p (worklist));
      }
  
    BITMAP_FREE (worklist);
    BITMAP_FREE (pending);
  
    /* Dump statistics. */
    if (dump_file)
--- 952,1044 ----
            if (TEST_BIT (considered, ob_index))
              bitmap_set_bit (pending, bbindex_to_postorder[ob_index]);
          }
+       return true;
      }
+   return false;
  }
  
+ /* Main dataflow solver loop.
  
+    DATAFLOW is problem we are solving, PENDING is worklist of basic blocks we
+    need to visit.
+    BLOCK_IN_POSTORDER is array of size N_BLOCKS specifying postorder in BBs and
+    BBINDEX_TO_POSTORDER is array mapping back BB->index to postorder possition.
+    PENDING will be freed.
  
!    The worklists are bitmaps indexed by postorder positions.  
! 
!    The function implements standard algorithm for dataflow solving with two
!    worklists (we are processing WORKLIST and storing new BBs to visit in
!    PENDING).
! 
!    As an optimization we maintain ages when BB was changed (stored in bb->aux)
!    and when it was last visited (stored in last_visit_age).  This avoids need
!    to re-do confluence function for edges to basic blocks whose source
!    did not change since destination was visited last time.  */
  
  static void
  df_worklist_dataflow_doublequeue (struct dataflow *dataflow,
  			  	  bitmap pending,
                                    sbitmap considered,
                                    int *blocks_in_postorder,
! 				  unsigned *bbindex_to_postorder,
! 				  int n_blocks)
  {
    enum df_flow_dir dir = dataflow->problem->dir;
    int dcount = 0;
    bitmap worklist = BITMAP_ALLOC (&df_bitmap_obstack);
+   int age = 0;
+   bool changed;
+   VEC(int, heap) *last_visit_age = NULL;
+   int prev_age;
+   basic_block bb;
+   int i;
+ 
+   VEC_safe_grow_cleared (int, heap, last_visit_age, n_blocks);
  
    /* Double-queueing. Worklist is for the current iteration,
       and pending is for the next. */
    while (!bitmap_empty_p (pending))
      {
+       bitmap_iterator bi;
+       unsigned int index;
+ 
        /* Swap pending and worklist. */
        bitmap temp = worklist;
        worklist = pending;
        pending = temp;
  
!       EXECUTE_IF_SET_IN_BITMAP (worklist, 0, index, bi)
  	{
  	  unsigned bb_index;
  	  dcount++;
  
! 	  bitmap_clear_bit (pending, index);
  	  bb_index = blocks_in_postorder[index];
! 	  bb = BASIC_BLOCK (bb_index);
! 	  prev_age = VEC_index (int, last_visit_age, index);
  	  if (dir == DF_FORWARD)
! 	    changed = df_worklist_propagate_forward (dataflow, bb_index,
! 						     bbindex_to_postorder,
! 						     pending, considered,
! 						     prev_age);
  	  else
! 	    changed = df_worklist_propagate_backward (dataflow, bb_index,
! 						      bbindex_to_postorder,
! 						      pending, considered,
! 						      prev_age);
! 	  VEC_replace (int, last_visit_age, index, ++age);
! 	  if (changed)
! 	    bb->aux = (void *)(ptrdiff_t)age;
  	}
!       bitmap_clear (worklist);
      }
+   for (i = 0; i < n_blocks; i++)
+     BASIC_BLOCK (blocks_in_postorder[i])->aux = NULL;
  
    BITMAP_FREE (worklist);
    BITMAP_FREE (pending);
+   VEC_free (int, heap, last_visit_age);
  
    /* Dump statistics. */
    if (dump_file)
*************** df_worklist_dataflow (struct dataflow *d
*** 1044,1051 ****
    /* Solve it.  */
    df_worklist_dataflow_doublequeue (dataflow, pending, considered,
  				    blocks_in_postorder,
! 				    bbindex_to_postorder);
! 
    sbitmap_free (considered);
    free (bbindex_to_postorder);
  }
--- 1102,1109 ----
    /* Solve it.  */
    df_worklist_dataflow_doublequeue (dataflow, pending, considered,
  				    blocks_in_postorder,
! 				    bbindex_to_postorder,
! 				    n_blocks);
    sbitmap_free (considered);
    free (bbindex_to_postorder);
  }
Index: dse.c
===================================================================
*** dse.c	(revision 160663)
--- dse.c	(working copy)
*************** dse_confluence_0 (basic_block bb)
*** 3396,3402 ****
     out set of the src of E.  If the various in or out sets are not
     there, that means they are all ones.  */
  
! static void
  dse_confluence_n (edge e)
  {
    bb_info_t src_info = bb_table[e->src->index];
--- 3396,3402 ----
     out set of the src of E.  If the various in or out sets are not
     there, that means they are all ones.  */
  
! static bool
  dse_confluence_n (edge e)
  {
    bb_info_t src_info = bb_table[e->src->index];
*************** dse_confluence_n (edge e)
*** 3412,3417 ****
--- 3412,3418 ----
  	  bitmap_copy (src_info->out, dest_info->in);
  	}
      }
+   return true;
  }
  
  
Index: df.h
===================================================================
*** df.h	(revision 160663)
--- df.h	(working copy)
*************** typedef void (*df_dataflow_function) (st
*** 222,231 ****
  /* Confluence operator for blocks with 0 out (or in) edges.  */
  typedef void (*df_confluence_function_0) (basic_block);
  
! /* Confluence operator for blocks with 1 or more out (or in) edges.  */
! typedef void (*df_confluence_function_n) (edge);
  
! /* Transfer function for blocks.  */
  typedef bool (*df_transfer_function) (int);
  
  /* Function to massage the information after the problem solving.  */
--- 222,233 ----
  /* Confluence operator for blocks with 0 out (or in) edges.  */
  typedef void (*df_confluence_function_0) (basic_block);
  
! /* Confluence operator for blocks with 1 or more out (or in) edges.
!    Return true if BB input data has changed.  */
! typedef bool (*df_confluence_function_n) (edge);
  
! /* Transfer function for blocks. 
!    Return true if BB output data has changed.  */
  typedef bool (*df_transfer_function) (int);
  
  /* Function to massage the information after the problem solving.  */
Index: df-problems.c
===================================================================
*** df-problems.c	(revision 160663)
--- df-problems.c	(working copy)
*************** df_rd_init_solution (bitmap all_blocks)
*** 479,492 ****
  
  /* In of target gets or of out of source.  */
  
! static void
  df_rd_confluence_n (edge e)
  {
    bitmap op1 = &df_rd_get_bb_info (e->dest->index)->in;
    bitmap op2 = &df_rd_get_bb_info (e->src->index)->out;
  
    if (e->flags & EDGE_FAKE)
!     return;
  
    if (e->flags & EDGE_EH)
      {
--- 479,493 ----
  
  /* In of target gets or of out of source.  */
  
! static bool
  df_rd_confluence_n (edge e)
  {
    bitmap op1 = &df_rd_get_bb_info (e->dest->index)->in;
    bitmap op2 = &df_rd_get_bb_info (e->src->index)->out;
+   bool changed = false;
  
    if (e->flags & EDGE_FAKE)
!     return false;
  
    if (e->flags & EDGE_EH)
      {
*************** df_rd_confluence_n (edge e)
*** 508,518 ****
   			      DF_DEFS_BEGIN (regno),
   			      DF_DEFS_COUNT (regno));
  	}
!       bitmap_ior_into (op1, &tmp);
        bitmap_clear (&tmp);
      }
    else
!     bitmap_ior_into (op1, op2);
  }
  
  
--- 509,520 ----
   			      DF_DEFS_BEGIN (regno),
   			      DF_DEFS_COUNT (regno));
  	}
!       changed |= bitmap_ior_into (op1, &tmp);
        bitmap_clear (&tmp);
+       return changed;
      }
    else
!     return bitmap_ior_into (op1, op2);
  }
  
  
*************** df_lr_confluence_0 (basic_block bb)
*** 966,986 ****
  
  /* Confluence function that ignores fake edges.  */
  
! static void
  df_lr_confluence_n (edge e)
  {
    bitmap op1 = &df_lr_get_bb_info (e->src->index)->out;
    bitmap op2 = &df_lr_get_bb_info (e->dest->index)->in;
  
    /* Call-clobbered registers die across exception and call edges.  */
    /* ??? Abnormal call edges ignored for the moment, as this gets
       confused by sibling call edges, which crashes reg-stack.  */
    if (e->flags & EDGE_EH)
!     bitmap_ior_and_compl_into (op1, op2, regs_invalidated_by_call_regset);
    else
!     bitmap_ior_into (op1, op2);
  
!   bitmap_ior_into (op1, &df->hardware_regs_used);
  }
  
  
--- 968,990 ----
  
  /* Confluence function that ignores fake edges.  */
  
! static bool
  df_lr_confluence_n (edge e)
  {
    bitmap op1 = &df_lr_get_bb_info (e->src->index)->out;
    bitmap op2 = &df_lr_get_bb_info (e->dest->index)->in;
+   bool changed = false;
  
    /* Call-clobbered registers die across exception and call edges.  */
    /* ??? Abnormal call edges ignored for the moment, as this gets
       confused by sibling call edges, which crashes reg-stack.  */
    if (e->flags & EDGE_EH)
!     changed = bitmap_ior_and_compl_into (op1, op2, regs_invalidated_by_call_regset);
    else
!     changed = bitmap_ior_into (op1, op2);
  
!   changed |= bitmap_ior_into (op1, &df->hardware_regs_used);
!   return changed;
  }
  
  
*************** df_live_init (bitmap all_blocks)
*** 1503,1518 ****
  
  /* Forward confluence function that ignores fake edges.  */
  
! static void
  df_live_confluence_n (edge e)
  {
    bitmap op1 = &df_live_get_bb_info (e->dest->index)->in;
    bitmap op2 = &df_live_get_bb_info (e->src->index)->out;
  
    if (e->flags & EDGE_FAKE)
!     return;
  
!   bitmap_ior_into (op1, op2);
  }
  
  
--- 1507,1522 ----
  
  /* Forward confluence function that ignores fake edges.  */
  
! static bool
  df_live_confluence_n (edge e)
  {
    bitmap op1 = &df_live_get_bb_info (e->dest->index)->in;
    bitmap op2 = &df_live_get_bb_info (e->src->index)->out;
  
    if (e->flags & EDGE_FAKE)
!     return false;
  
!   return bitmap_ior_into (op1, op2);
  }
  
  
*************** df_byte_lr_confluence_0 (basic_block bb)
*** 2710,2732 ****
  
  /* Confluence function that ignores fake edges.  */
  
! static void
  df_byte_lr_confluence_n (edge e)
  {
    struct df_byte_lr_problem_data *problem_data
      = (struct df_byte_lr_problem_data *)df_byte_lr->problem_data;
    bitmap op1 = &df_byte_lr_get_bb_info (e->src->index)->out;
    bitmap op2 = &df_byte_lr_get_bb_info (e->dest->index)->in;
  
    /* Call-clobbered registers die across exception and call edges.  */
    /* ??? Abnormal call edges ignored for the moment, as this gets
       confused by sibling call edges, which crashes reg-stack.  */
    if (e->flags & EDGE_EH)
!     bitmap_ior_and_compl_into (op1, op2, &problem_data->invalidated_by_call);
    else
!     bitmap_ior_into (op1, op2);
  
!   bitmap_ior_into (op1, &problem_data->hardware_regs_used);
  }
  
  
--- 2714,2739 ----
  
  /* Confluence function that ignores fake edges.  */
  
! static bool
  df_byte_lr_confluence_n (edge e)
  {
    struct df_byte_lr_problem_data *problem_data
      = (struct df_byte_lr_problem_data *)df_byte_lr->problem_data;
    bitmap op1 = &df_byte_lr_get_bb_info (e->src->index)->out;
    bitmap op2 = &df_byte_lr_get_bb_info (e->dest->index)->in;
+   bool changed = false;
  
    /* Call-clobbered registers die across exception and call edges.  */
    /* ??? Abnormal call edges ignored for the moment, as this gets
       confused by sibling call edges, which crashes reg-stack.  */
    if (e->flags & EDGE_EH)
!     changed = bitmap_ior_and_compl_into (op1, op2,
! 					 &problem_data->invalidated_by_call);
    else
!     changed = bitmap_ior_into (op1, op2);
  
!   changed |= bitmap_ior_into (op1, &problem_data->hardware_regs_used);
!   return changed;
  }
  
  
*************** df_md_confluence_0 (basic_block bb)
*** 4426,4444 ****
  
  /* In of target gets or of out of source.  */
  
! static void
  df_md_confluence_n (edge e)
  {
    bitmap op1 = &df_md_get_bb_info (e->dest->index)->in;
    bitmap op2 = &df_md_get_bb_info (e->src->index)->out;
  
    if (e->flags & EDGE_FAKE)
!     return;
  
    if (e->flags & EDGE_EH)
!     bitmap_ior_and_compl_into (op1, op2, regs_invalidated_by_call_regset);
    else
!     bitmap_ior_into (op1, op2);
  }
  
  /* Free all storage associated with the problem.  */
--- 4433,4452 ----
  
  /* In of target gets or of out of source.  */
  
! static bool
  df_md_confluence_n (edge e)
  {
    bitmap op1 = &df_md_get_bb_info (e->dest->index)->in;
    bitmap op2 = &df_md_get_bb_info (e->src->index)->out;
  
    if (e->flags & EDGE_FAKE)
!     return false;
  
    if (e->flags & EDGE_EH)
!     return bitmap_ior_and_compl_into (op1, op2,
! 				      regs_invalidated_by_call_regset);
    else
!     return bitmap_ior_into (op1, op2);
  }
  
  /* Free all storage associated with the problem.  */



More information about the Gcc-patches mailing list