This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] more PR33928: DIY dataflow in fwprop, to enable it at -O1 and speed it up at -O2


This patch modifies fwprop to build its own datastructure for
use-to-single-def chains instead of using fullblown UD chains.  This is
not hard to do at all with a little refactoring of df-problems.c to
allow simulating RD just like it's done with LR.

Quoting Steven's comment from the PR, "alternatively we could re-work
fwprop to work on regions and use the partial-CFG dataflow stuff,
similar to what the RTL loop optimizers (like loop-invariant) do".
Summarizing advantages and disadvantages we have:

this patch
----------

    + easy, and can easily be reverted

    + no code difference at -O2 before and after the patch (not tested
      except on the PR testcase), giving more confidence in backporting

    - less extensible to cases where we want to forward propagate
      reaching definitions if they are all of a similar form (Bernd
      had a patch to form widening multiplications in fwprop)

region approach
---------------

    + may save further time in RD solution

    - could give different kinds of pessimization, the semi-global
      approach of fwprop seems to work well from tests made on
      well-known benchmarks who shalt not be named.

    - I don't think anyone is going to work on it soon :-)

Bootstrapped/regtested i686-pc-linux-gnu, checked that the time spent in
fwprop even for devilish PR26854 testcases is very small.  Ok for trunk
and after a while 4.4?

Paolo
2009-05-08  Paolo Bonzini  <bonzini@gnu.org>

	PR rtl-optimization/33928
	* fwprop.c (use_def_ref, get_def_for_use, bitmap_only_bit_bitween,
	process_uses, build_single_def_use_links): New.
	(update_df): Update use_def_ref.
	(forward_propagate_into): Use get_def_for_use instead of use-def
	chains.
	(fwprop_init): Call build_single_def_use_links and let it initialize
	dataflow.
	(fwprop_done): Free use_def_ref.
	(fwprop_addr): Eliminate duplicate call to df_set_flags.
	* df-problems.c (df_rd_simulate_artificial_defs_at_top, 
	df_rd_simulate_one_insn): New.
	(df_rd_bb_local_compute_process_def): Update head comment.
	(df_chain_create_bb): Use the new RD simulation functions.
	* df.h (df_rd_simulate_artificial_defs_at_top, 
	df_rd_simulate_one_insn): New.
	* opts.c (decode_options): Enable fwprop at -O1.
	* doc/invoke.texi (-fforward-propagate): Document this.

Index: fwprop.c
===================================================================
--- fwprop.c	(revision 147269)
+++ fwprop.c	(working copy)
@@ -105,6 +105,92 @@ along with GCC; see the file COPYING3.  
 
 static int num_changes;
 
+DEF_VEC_P(df_ref);
+DEF_VEC_ALLOC_P(df_ref,heap);
+VEC(df_ref,heap) *use_def_ref;
+
+static inline df_ref
+get_def_for_use (df_ref use)
+{
+  return VEC_index (df_ref, use_def_ref, DF_REF_ID (use));
+}
+
+static inline int
+bitmap_only_bit_between (const_bitmap b, unsigned first, unsigned last)
+{
+  bitmap_iterator bi;
+  unsigned bit, bit2;
+
+  if (last < first)
+    return -1;
+
+  bmp_iter_set_init (&bi, b, first, &bit);
+  if (bmp_iter_set (&bi, &bit) && bit <= last)
+    {
+      bit2 = bit;
+      bmp_iter_next (&bi, &bit2);
+      if (!bmp_iter_set (&bi, &bit2) || bit2 > last)
+        return bit;
+    }
+  return -1;
+}
+
+static void
+process_uses (bitmap local_rd, df_ref *use_rec, int top_flag)
+{
+  df_ref use;
+  while ((use = *use_rec++) != NULL)
+    if (top_flag == (DF_REF_FLAGS (use) & DF_REF_AT_TOP))
+      {
+	unsigned int uregno = DF_REF_REGNO (use);
+	unsigned int first = DF_DEFS_BEGIN (uregno);
+	unsigned int last = first + DF_DEFS_COUNT (uregno) - 1;
+	int defno = bitmap_only_bit_between (local_rd, first, last);
+	df_ref def = (defno == -1) ? NULL : DF_DEFS_GET (defno);
+
+	VEC_replace (df_ref, use_def_ref, DF_REF_ID (use), def);
+      }
+}
+
+static void
+build_single_def_use_links (void)
+{
+  basic_block bb;
+  bitmap local_rd = BITMAP_ALLOC (NULL);
+
+  /* We use reaching definitions to compute our restricted use-def chains.  */
+  df_set_flags (DF_EQ_NOTES);
+  df_rd_add_problem ();
+  df_analyze ();
+  df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
+
+  use_def_ref = VEC_alloc (df_ref, heap, DF_USES_TABLE_SIZE ());
+  VEC_safe_grow (df_ref, heap, use_def_ref, DF_USES_TABLE_SIZE ());
+
+  FOR_EACH_BB (bb)
+    {
+      int bb_index = bb->index;
+      struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index);
+      rtx insn;
+
+      bitmap_copy (local_rd, bb_info->in);
+      process_uses (local_rd, df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
+
+      df_rd_simulate_artificial_defs_at_top (bb, local_rd);
+      FOR_BB_INSNS (bb, insn)
+        if (INSN_P (insn))
+          {
+            unsigned int uid = INSN_UID (insn);
+            process_uses (local_rd, DF_INSN_UID_USES (uid), 0);
+            process_uses (local_rd, DF_INSN_UID_EQ_USES (uid), 0);
+            df_rd_simulate_one_insn (bb, insn, local_rd);
+	  }
+
+      process_uses (local_rd, df_get_artificial_uses (bb_index), 0);
+    }
+
+  BITMAP_FREE (local_rd);
+}
 
 /* Do not try to replace constant addresses or addresses of local and
    argument slots.  These MEM expressions are made only once and inserted
@@ -716,7 +802,8 @@ update_df (rtx insn, rtx *loc, df_ref *u
 			       width, offset, mode);
 
-      /* Set up the use-def chain.  */
-      df_chain_copy (new_use, DF_REF_CHAIN (orig_use));
+      /* Set up the use-def link.  */
+      gcc_assert (DF_REF_ID (new_use) == (int) VEC_length (df_ref, use_def_ref));
+      VEC_safe_push (df_ref, heap, use_def_ref, get_def_for_use (orig_use));
       changed = true;
     }
   if (changed)
@@ -1035,7 +1122,6 @@ forward_propagate_and_simplify (df_ref u
 static void
 forward_propagate_into (df_ref use)
 {
-  struct df_link *defs;
   df_ref def;
   rtx def_insn, def_set, use_insn;
   rtx parent;
@@ -1046,11 +1132,9 @@ forward_propagate_into (df_ref use)
     return;
 
   /* Only consider uses that have a single definition.  */
-  defs = DF_REF_CHAIN (use);
-  if (!defs || defs->next)
+  def = get_def_for_use (use);
+  if (!def)
     return;
-
-  def = defs->ref;
   if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
     return;
   if (DF_REF_IS_ARTIFICIAL (def))
@@ -1096,12 +1180,7 @@ fwprop_init (void)
      insns (sadly) if we are not working in cfglayout mode.  */
   loop_optimizer_init (0);
 
-  /* Now set up the dataflow problem (we only want use-def chains) and
-     put the dataflow solver to work.  */
-  df_set_flags (DF_EQ_NOTES);
-  df_chain_add_problem (DF_UD_CHAIN);
-  df_analyze ();
-  df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
+  build_single_def_use_links ();
   df_set_flags (DF_DEFER_INSN_RESCAN);
 }
 
@@ -1110,6 +1189,7 @@ fwprop_done (void)
 {
   loop_optimizer_finalize ();
 
+  VEC_free (df_ref, heap, use_def_ref);
   free_dominance_info (CDI_DOMINATORS);
   cleanup_cfg (0);
   delete_trivially_dead_insns (get_insns (), max_reg_num ());
@@ -1187,8 +1267,6 @@ fwprop_addr (void)
 
   /* Go through all the uses.  update_df will create new ones at the
      end, and we'll go through them as well.  */
-  df_set_flags (DF_DEFER_INSN_RESCAN);
-
   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
     {
       df_ref use = DF_USES_GET (i);
Index: df-problems.c
===================================================================
--- df-problems.c	(revision 147269)
+++ df-problems.c	(working copy)
@@ -316,7 +316,61 @@ df_rd_alloc (bitmap all_blocks)
 }
 
 
-/* Process a list of DEFs for df_rd_bb_local_compute.  */
+/* Add the effect of the top artificial defs of BB to the reaching definitions
+   bitmap LOCAL_RD.  */
+
+void
+df_rd_simulate_artificial_defs_at_top (basic_block bb, bitmap local_rd)
+{
+  int bb_index = bb->index;
+  df_ref *def_rec;
+  for (def_rec = df_get_artificial_defs (bb_index); *def_rec; def_rec++)
+    {
+      df_ref def = *def_rec;
+      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
+	{
+	  unsigned int dregno = DF_REF_REGNO (def);
+	  if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
+	    bitmap_clear_range (local_rd, 
+				DF_DEFS_BEGIN (dregno), 
+				DF_DEFS_COUNT (dregno));
+	  bitmap_set_bit (local_rd, DF_REF_ID (def));
+	}
+    }
+}
+
+/* Add the effect of the defs of INSN to the reaching definitions bitmap
+   LOCAL_RD.  */
+
+void
+df_rd_simulate_one_insn (basic_block bb ATTRIBUTE_UNUSED, rtx insn,
+			 bitmap local_rd)
+{
+  unsigned uid = INSN_UID (insn);
+  df_ref *def_rec;
+
+  for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+    {
+      df_ref def = *def_rec;
+      unsigned int dregno = DF_REF_REGNO (def);
+      if ((!(df->changeable_flags & DF_NO_HARD_REGS))
+          || (dregno >= FIRST_PSEUDO_REGISTER))
+        {
+          if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
+	    bitmap_clear_range (local_rd, 
+				DF_DEFS_BEGIN (dregno), 
+				DF_DEFS_COUNT (dregno));
+	  if (!(DF_REF_FLAGS (def) 
+		& (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER)))
+	    bitmap_set_bit (local_rd, DF_REF_ID (def));
+	}
+    }
+}
+
+/* Process a list of DEFs for df_rd_bb_local_compute.  This is a bit
+   more complicated than just simulating, because we must produce the
+   gen and kill sets and hence deal with the two possible representations
+   of kill sets.   */
 
 static void
 df_rd_bb_local_compute_process_def (struct df_rd_bb_info *bb_info, 
@@ -2076,7 +2130,6 @@ df_chain_create_bb (unsigned int bb_inde
   struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index);
   rtx insn;
   bitmap cpy = BITMAP_ALLOC (NULL);
-  df_ref *def_rec;
 
   bitmap_copy (cpy, bb_info->in);
   bitmap_set_bit (df_chain->out_of_date_transfer_functions, bb_index);
@@ -2095,57 +2148,23 @@ df_chain_create_bb (unsigned int bb_inde
 				    DF_REF_AT_TOP);
 #endif
 
-  for (def_rec = df_get_artificial_defs (bb_index); *def_rec; def_rec++)
-    {
-      df_ref def = *def_rec;
-      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
-	{
-	  unsigned int dregno = DF_REF_REGNO (def);
-	  if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
-	    bitmap_clear_range (cpy, 
-				DF_DEFS_BEGIN (dregno), 
-				DF_DEFS_COUNT (dregno));
-	  bitmap_set_bit (cpy, DF_REF_ID (def));
-	}
-    }
+  df_rd_simulate_artificial_defs_at_top (bb, cpy);
   
   /* Process the regular instructions next.  */
   FOR_BB_INSNS (bb, insn)
-    {
-      df_ref *def_rec;
-      unsigned int uid = INSN_UID (insn);
-
-      if (!INSN_P (insn))
-	continue;
-
-      /* Now scan the uses and link them up with the defs that remain
-	 in the cpy vector.  */
-      
-      df_chain_create_bb_process_use (cpy, DF_INSN_UID_USES (uid), 0);
-
-      if (df->changeable_flags & DF_EQ_NOTES)
-	df_chain_create_bb_process_use (cpy, DF_INSN_UID_EQ_USES (uid), 0);
+    if (INSN_P (insn))
+      {
+        unsigned int uid = INSN_UID (insn);
 
+        /* First scan the uses and link them up with the defs that remain
+	   in the cpy vector.  */
+        df_chain_create_bb_process_use (cpy, DF_INSN_UID_USES (uid), 0);
+        if (df->changeable_flags & DF_EQ_NOTES)
+	  df_chain_create_bb_process_use (cpy, DF_INSN_UID_EQ_USES (uid), 0);
 
-      /* Since we are going forwards, process the defs second.  This
-         pass only changes the bits in cpy.  */
-      for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
-	{
-	  df_ref def = *def_rec;
-	  unsigned int dregno = DF_REF_REGNO (def);
-	  if ((!(df->changeable_flags & DF_NO_HARD_REGS))
-	      || (dregno >= FIRST_PSEUDO_REGISTER))
-	    {
-	      if (!(DF_REF_FLAGS (def) & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
-		bitmap_clear_range (cpy, 
-				    DF_DEFS_BEGIN (dregno), 
-				    DF_DEFS_COUNT (dregno));
-	      if (!(DF_REF_FLAGS (def) 
-		    & (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER)))
-		bitmap_set_bit (cpy, DF_REF_ID (def));
-	    }
-	}
-    }
+        /* Since we are going forwards, process the defs second.  */
+        df_rd_simulate_one_insn (bb, insn, cpy);
+      }
 
   /* Create the chains for the artificial uses of the hard registers
      at the end of the block.  */
Index: df.h
===================================================================
--- df.h	(revision 147269)
+++ df.h	(working copy)
@@ -939,6 +939,8 @@ extern void df_grow_bb_info (struct data
 extern void df_chain_dump (struct df_link *, FILE *);
 extern void df_print_bb_index (basic_block bb, FILE *file);
 extern void df_rd_add_problem (void);
+extern void df_rd_simulate_artificial_defs_at_top (basic_block, bitmap);
+extern void df_rd_simulate_one_insn (basic_block, rtx, bitmap);
 extern void df_lr_add_problem (void);
 extern void df_lr_verify_transfer_functions (void);
 extern void df_live_verify_transfer_functions (void);
Index: opts.c
===================================================================
--- opts.c	(revision 147269)
+++ opts.c	(working copy)
@@ -848,6 +848,7 @@ decode_options (unsigned int argc, const
 #endif
   flag_guess_branch_prob = opt1;
   flag_cprop_registers = opt1;
+  flag_forward_propagate = opt1;
   flag_if_conversion = opt1;
   flag_if_conversion2 = opt1;
   flag_ipa_pure_const = opt1;
@@ -873,7 +874,6 @@ decode_options (unsigned int argc, const
   flag_thread_jumps = opt2;
   flag_crossjumping = opt2;
   flag_optimize_sibling_calls = opt2;
-  flag_forward_propagate = opt2;
   flag_cse_follow_jumps = opt2;
   flag_gcse = opt2;
   flag_expensive_optimizations = opt2;
Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi	(revision 147269)
+++ doc/invoke.texi	(working copy)
@@ -5554,8 +5554,8 @@ instructions and checks if the result ca
 is active, two passes are performed and the second is scheduled after
 loop unrolling.
 
-This option is enabled by default at optimization levels @option{-O2},
-@option{-O3}, @option{-Os}.
+This option is enabled by default at optimization levels @option{-O},
+@option{-O2}, @option{-O3}, @option{-Os}.
 
 @item -fomit-frame-pointer
 @opindex fomit-frame-pointer

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]