This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[dataflow] PATCH to make small performance improvement and more ia64 fixes.


This patch removes a large number of calls to either find the postorder
numbering or delete unreachable blocks.  This saves about .5% in compile
time.

This patch also fixes more ia-64 problems. 

This has been bootstrapped on x86_64-unknown-linux-gnu.  The ia-64 is
now awaiting a merge with the mainline. They both fail bootstrap in the
same way.

Kenny


2006-08-04  Kenneth Zadeck <zadeck@naturalbridge.com>

    * tree-pass.h (pass_rtl_dse): Split into pass_rtl_dse1,
    pass_rtl_dse2, pass_rtl_dse3.
    * passes.c (init_optimization_passes): Ditto. 
    * timevar.def (TV_DSE): Split into TV_DSE1, TV_DSE2, and TV_DSE3.
    (TV_THREAD_PROLOGUE_AND_EPILOGUE): Made text shorter to improve
    readability.
    * df.core.c (df_init, df_finish1, df_analyze_problem, df_analyze):
    Made postorder and instance variable of df. 
    (df_finish1): Made tolerant of being passed NULL instance.
    (df_get_n_blocks, df_get_postorder): New functions.
    * cfganal (post_order_compute): Added delete_unreachable
    parameter and code to delete unreachable blocks.
    * local_alloc (rest_of_handle_local_alloc): Removed unnecessary
    call to delete_unreachable_blocks.
    * df.h (postorder, n_blocks): New instance variables.
    (df_get_n_blocks, df_get_postorder): New functions.
    * sched-rgn.c (extend_rgns): Added extra parameter to
    post_order_compute.
    * basic-block.h (post_order_compute): Ditto.
    * dce.c (fast_dce, init_rs_dflow): Now uses postorder and n_blocks
from df.
    (pass_rtl_dse): Split into pass_rtl_dse1,
    pass_rtl_dse2, pass_rtl_dse3.
    * sched-ebb.c (schedule-ebbs): Added return value.
    * haifa-sched.c (add_block): Made df parameter unused and fixed
    incorrect assert. 

Index: tree-pass.h
===================================================================
--- tree-pass.h	(revision 115812)
+++ tree-pass.h	(working copy)
@@ -341,7 +341,9 @@ extern struct tree_opt_pass pass_jump2;
 extern struct tree_opt_pass pass_cse;
 extern struct tree_opt_pass pass_fast_rtl_dce;
 extern struct tree_opt_pass pass_rtl_dce;
-extern struct tree_opt_pass pass_rtl_dse;
+extern struct tree_opt_pass pass_rtl_dse1;
+extern struct tree_opt_pass pass_rtl_dse2;
+extern struct tree_opt_pass pass_rtl_dse3;
 extern struct tree_opt_pass pass_gcse;
 extern struct tree_opt_pass pass_jump_bypass;
 extern struct tree_opt_pass pass_profiling;
Index: passes.c
===================================================================
--- passes.c	(revision 115812)
+++ passes.c	(working copy)
@@ -648,7 +648,7 @@ init_optimization_passes (void)
   NEXT_PASS (pass_loop2);
   NEXT_PASS (pass_web);
   NEXT_PASS (pass_cse2);
-  NEXT_PASS (pass_rtl_dse);
+  NEXT_PASS (pass_rtl_dse1);
   NEXT_PASS (pass_rtl_fwprop_addr);
   NEXT_PASS (pass_regclass_init);
   NEXT_PASS (pass_subregs_of_mode_init);
@@ -657,7 +657,7 @@ init_optimization_passes (void)
   NEXT_PASS (pass_initialize_subregs);
   NEXT_PASS (pass_no_new_pseudos);
   NEXT_PASS (pass_combine);
-  NEXT_PASS (pass_rtl_dse);
+  NEXT_PASS (pass_rtl_dse2);
   NEXT_PASS (pass_if_after_combine);
   NEXT_PASS (pass_fast_rtl_dce);
   NEXT_PASS (pass_partition_blocks);
@@ -679,7 +679,7 @@ init_optimization_passes (void)
   NEXT_PASS (pass_split_after_reload);
   NEXT_PASS (pass_branch_target_load_optimize1);
   NEXT_PASS (pass_thread_prologue_and_epilogue);
-  NEXT_PASS (pass_rtl_dse);
+  NEXT_PASS (pass_rtl_dse3);
   NEXT_PASS (pass_rtl_seqabstr);
   NEXT_PASS (pass_stack_adjustments);
   NEXT_PASS (pass_peephole2);
Index: timevar.def
===================================================================
--- timevar.def	(revision 115812)
+++ timevar.def	(working copy)
@@ -131,7 +131,9 @@ DEFTIMEVAR (TV_JUMP                  , "
 DEFTIMEVAR (TV_FWPROP                , "forward prop")
 DEFTIMEVAR (TV_CSE                   , "CSE")
 DEFTIMEVAR (TV_DCE                   , "dead code elimination")
-DEFTIMEVAR (TV_DSE                   , "dead store elimination")
+DEFTIMEVAR (TV_DSE1                  , "dead store elim1")
+DEFTIMEVAR (TV_DSE2                  , "dead store elim2")
+DEFTIMEVAR (TV_DSE3                  , "dead store elim3")
 DEFTIMEVAR (TV_LOOP                  , "loop analysis")
 DEFTIMEVAR (TV_GCSE                  , "global CSE")
 DEFTIMEVAR (TV_CPROP1                , "CPROP 1")
@@ -158,7 +160,7 @@ DEFTIMEVAR (TV_GLOBAL_ALLOC          , "
 DEFTIMEVAR (TV_RELOAD_CSE_REGS       , "reload CSE regs")
 DEFTIMEVAR (TV_SEQABSTR              , "sequence abstraction")
 DEFTIMEVAR (TV_GCSE_AFTER_RELOAD      , "load CSE after reload")
-DEFTIMEVAR (TV_THREAD_PROLOGUE_AND_EPILOGUE, "thread prologue and epilogue")
+DEFTIMEVAR (TV_THREAD_PROLOGUE_AND_EPILOGUE, "thread pro- & epilogue")
 DEFTIMEVAR (TV_IFCVT2		     , "if-conversion 2")
 DEFTIMEVAR (TV_PEEPHOLE2             , "peephole 2")
 DEFTIMEVAR (TV_RENAME_REGISTERS      , "rename registers")
Index: df-core.c
===================================================================
--- df-core.c	(revision 115812)
+++ df-core.c	(working copy)
@@ -330,6 +330,7 @@ df_init (int flags)
 
   gcc_assert (!df_current_instance);
   df_current_instance = df;
+  df->postorder = NULL;
   return df;
 }
 
@@ -508,12 +509,17 @@ df_finish1 (struct df *df)
 {
   int i;
 
+  if (!df)
+    return;
+
   for (i = 0; i < df->num_problems_defined; i++)
     {
       struct dataflow *dflow = df->problems_in_order[i];
       dflow->problem->free_fun (df->problems_in_order[i]); 
     }
 
+  if (df->postorder)
+    free (df->postorder);
   free (df);
   df_current_instance = NULL;
 }
@@ -805,19 +811,17 @@ df_analyze_problem (struct dataflow *dfl
 void
 df_analyze (struct df *df)
 {
-  int *postorder = XNEWVEC (int, last_basic_block);
   bitmap current_all_blocks = BITMAP_ALLOC (NULL);
-  int n_blocks;
   int i;
   bool everything;
 
-  n_blocks = post_order_compute (postorder, true);
+  if (df->postorder)
+    free (df->postorder);
+  df->postorder = XNEWVEC (int, last_basic_block);
+  df->n_blocks = post_order_compute (df->postorder, true, true);
 
-  if (n_blocks != n_basic_blocks)
-    delete_unreachable_blocks ();
-
-  for (i = 0; i < n_blocks; i++)
-    bitmap_set_bit (current_all_blocks, postorder[i]);
+  for (i = 0; i < df->n_blocks; i++)
+    bitmap_set_bit (current_all_blocks, df->postorder[i]);
 
   /* No one called df_rescan_blocks, so do it.  */
   if (!df->blocks_to_scan)
@@ -831,7 +835,8 @@ df_analyze (struct df *df)
     {
       everything = false;
       bitmap_and_into (df->blocks_to_analyze, current_all_blocks);
-      n_blocks = df_prune_to_subcfg (postorder, n_blocks, df->blocks_to_analyze);
+      df->n_blocks = df_prune_to_subcfg (df->postorder, 
+					 df->n_blocks, df->blocks_to_analyze);
       BITMAP_FREE (current_all_blocks);
     }
   else
@@ -846,7 +851,7 @@ df_analyze (struct df *df)
     df_analyze_problem (df->problems_in_order[i], 
 			df->blocks_to_analyze, df->blocks_to_analyze, 
 			df->blocks_to_scan,
-			postorder, n_blocks, false);
+			df->postorder, df->n_blocks, false);
 
   if (everything)
     {
@@ -856,7 +861,25 @@ df_analyze (struct df *df)
 
   BITMAP_FREE (df->blocks_to_scan);
   df->blocks_to_scan = NULL;
-  free (postorder);
+}
+
+
+/* Return the number of basic blocks from the last call to df_analyze.  */
+
+int 
+df_get_n_blocks (struct df *df)
+{
+  gcc_assert (df->postorder);
+  return df->n_blocks;
+}
+
+
+/* Return a pointer to the array of basic blocks from the last call to df_analyze.  */
+int *
+df_get_postorder (struct df *df)
+{
+  gcc_assert (df->postorder);
+  return df->postorder;
 }
 
 static struct df_problem user_problem; 
Index: cfganal.c
===================================================================
--- cfganal.c	(revision 115812)
+++ cfganal.c	(working copy)
@@ -645,16 +645,20 @@ connect_infinite_loops_to_exit (void)
   return;
 }
 
-/* Compute reverse top sort order.  
-   This is computing a post order numbering of the graph.  */
+/* Compute reverse top sort order.  This is computing a post order
+   numbering of the graph.  If INCLUDE_ENTRY_EXIT is true, then then
+   ENTRY_BLOCK and EXIT_BLOCK are included.  If DELETE_UNREACHABLE is
+   true, unreachable blocks are deleted.  */
 
 int
-post_order_compute (int *post_order, bool include_entry_exit)
+post_order_compute (int *post_order, bool include_entry_exit, 
+		    bool delete_unreachable)
 {
   edge_iterator *stack;
   int sp;
   int post_order_num = 0;
   sbitmap visited;
+  int count;
 
   if (include_entry_exit)
     post_order[post_order_num++] = EXIT_BLOCK;
@@ -699,7 +703,7 @@ post_order_compute (int *post_order, boo
       else
 	{
 	  if (ei_one_before_end_p (ei) && src != ENTRY_BLOCK_PTR)
-	   post_order[post_order_num++] = src->index;
+	    post_order[post_order_num++] = src->index;
 
 	  if (!ei_one_before_end_p (ei))
 	    ei_next (&stack[sp - 1]);
@@ -709,7 +713,29 @@ post_order_compute (int *post_order, boo
     }
 
   if (include_entry_exit)
-    post_order[post_order_num++] = ENTRY_BLOCK;
+    {
+      post_order[post_order_num++] = ENTRY_BLOCK;
+      count = post_order_num;
+    }
+  else 
+    count = post_order_num + 2;
+  
+  /* Delete the unreachable blocks if some were found and we are
+     supposed to do it.  */
+  if (delete_unreachable && (count != n_basic_blocks))
+    {
+      basic_block b;
+      basic_block next_bb;
+      for (b = ENTRY_BLOCK_PTR->next_bb; b != EXIT_BLOCK_PTR; b = next_bb)
+	{
+	  next_bb = b->next_bb;
+	  
+	  if (!(TEST_BIT (visited, b->index)))
+	    delete_basic_block (b);
+	}
+      
+      tidy_fallthru_edges ();
+    }
 
   free (stack);
   sbitmap_free (visited);
Index: local-alloc.c
===================================================================
--- local-alloc.c	(revision 115812)
+++ local-alloc.c	(working copy)
@@ -2529,8 +2529,6 @@ rest_of_handle_local_alloc (void)
 
       rebuild_jump_labels (get_insns ());
       purge_all_dead_edges ();
-      delete_unreachable_blocks ();
-
       timevar_pop (TV_JUMP);
     }
 
Index: df.h
===================================================================
--- df.h	(revision 115812)
+++ df.h	(working copy)
@@ -400,6 +400,8 @@ struct df
   bitmap eh_block_artificial_uses;
   bitmap entry_block_defs;       /* The set of hardware registers live on entry to the function.  */
   bitmap exit_block_uses;        /* The set of hardware registers used in exit block.  */
+  int *postorder;                /* The current set of basic blocks in postorder.  */
+  int n_blocks;                  /* The number of blocks in postorder.  */
 };
 
 #define DF_SCAN_BB_INFO(DF, BB) (df_scan_get_bb_info((DF)->problems_by_index[DF_SCAN],(BB)->index))
@@ -632,6 +634,8 @@ extern void df_delete_basic_block (struc
 extern void df_finish1 (struct df *);
 extern void df_analyze_problem (struct dataflow *, bitmap, bitmap, bitmap, int *, int, bool);
 extern void df_analyze (struct df *);
+extern int df_get_n_blocks (struct df *);
+extern int *df_get_postorder (struct df *);
 extern void df_simple_iterative_dataflow (enum df_flow_dir, df_init_function,
 					  df_confluence_function_0, df_confluence_function_n,
 					  df_transfer_function, bitmap, int *, int);
Index: sched-rgn.c
===================================================================
--- sched-rgn.c	(revision 115812)
+++ sched-rgn.c	(working copy)
@@ -1030,7 +1030,7 @@ extend_rgns (int *degree, int *idxp, sbi
   max_hdr = xmalloc (last_basic_block * sizeof (*max_hdr));
 
   order = xmalloc (last_basic_block * sizeof (*order));
-  post_order_compute (order, false);
+  post_order_compute (order, false, false);
 
   for (i = nblocks - 1; i >= 0; i--)
     {
Index: basic-block.h
===================================================================
--- basic-block.h	(revision 115812)
+++ basic-block.h	(working copy)
@@ -517,7 +517,7 @@ extern edge redirect_edge_succ_nodup (ed
 extern void redirect_edge_pred (edge, basic_block);
 extern basic_block create_basic_block_structure (rtx, rtx, rtx, basic_block);
 extern void clear_bb_flags (void);
-extern int post_order_compute (int *, bool);
+extern int post_order_compute (int *, bool, bool);
 extern int pre_and_rev_post_order_compute (int *, int *, bool);
 extern int dfs_enumerate_from (basic_block, int,
 			       bool (*)(basic_block, void *),
Index: dce.c
===================================================================
--- dce.c	(revision 115812)
+++ dce.c	(working copy)
@@ -561,7 +561,8 @@ dce_process_block (basic_block bb, bool 
 static void
 fast_dce (bool df_delete)
 {
-  int *postorder = xmalloc (sizeof (int) *last_basic_block);
+  int *postorder = df_get_postorder (dce_df);
+  int n_blocks = df_get_n_blocks (dce_df);
   int i;
   /* The set of blocks that have been seen on this iteration.  */
   bitmap processed = BITMAP_ALLOC (NULL);
@@ -574,15 +575,11 @@ fast_dce (bool df_delete)
   bitmap all_blocks = BITMAP_ALLOC (NULL);
   bool global_changed = true;
   struct dataflow * dflow = dce_df->problems_by_index[DF_LR];
-  int n_blocks;
+
   int loop_count = 0;
 
   prescan_insns_for_dce (true);
 
-  n_blocks = post_order_compute (postorder, true);
-  if (n_blocks != n_basic_blocks)
-    delete_unreachable_blocks ();
-
   for (i = 0; i < n_blocks; i++)
     bitmap_set_bit (all_blocks, postorder[i]);
 
@@ -649,7 +646,6 @@ fast_dce (bool df_delete)
 
   delete_unmarked_insns (df_delete);
 
-  free (postorder);
   BITMAP_FREE (processed);
   BITMAP_FREE (changed);
   BITMAP_FREE (redo_out);
@@ -968,8 +964,8 @@ init_rs_dflow (void)
   out_vec = XNEWVEC (bitmap, last_basic_block);
   gen_vec = XNEWVEC (bitmap, last_basic_block);
   kill_vec = XNEWVEC (bitmap, last_basic_block);
-  postorder = XNEWVEC (int, last_basic_block);
-  n_blocks = post_order_compute (postorder, true);
+  n_blocks = df_get_n_blocks (dce_df);
+  postorder = df_get_postorder (dce_df);
   iterating = BITMAP_ALLOC (NULL);
 
   num_stores = VEC_length (store_info, stores.stores);
@@ -1013,7 +1009,6 @@ end_rs_dflow (void)
   XDELETE (max_in_luid);
 
   BITMAP_FREE (iterating);
-  XDELETE (postorder);
   XDELETE (kill_vec);
   XDELETE (gen_vec);
   XDELETE (out_vec);
@@ -1637,15 +1632,53 @@ gate_dse (void)
   return optimize > 0 && flag_new_dce;
 }
 
-struct tree_opt_pass pass_rtl_dse =
+struct tree_opt_pass pass_rtl_dse1 =
+{
+  "dse1",                               /* name */
+  gate_dse,                             /* gate */
+  rest_of_handle_dse,                   /* execute */
+  NULL,                                 /* sub */
+  NULL,                                 /* next */
+  0,                                    /* static_pass_number */
+  TV_DSE1,                              /* tv_id */
+  0,                                    /* properties_required */
+  0,                                    /* properties_provided */
+  0,                                    /* properties_destroyed */
+  0,                                    /* todo_flags_start */
+  TODO_dump_func |
+  TODO_df_finish |
+  TODO_ggc_collect,                     /* todo_flags_finish */
+  'w'                                   /* letter */
+};
+
+struct tree_opt_pass pass_rtl_dse2 =
+{
+  "dse2",                               /* name */
+  gate_dse,                             /* gate */
+  rest_of_handle_dse,                   /* execute */
+  NULL,                                 /* sub */
+  NULL,                                 /* next */
+  0,                                    /* static_pass_number */
+  TV_DSE2,                              /* tv_id */
+  0,                                    /* properties_required */
+  0,                                    /* properties_provided */
+  0,                                    /* properties_destroyed */
+  0,                                    /* todo_flags_start */
+  TODO_dump_func |
+  TODO_df_finish |
+  TODO_ggc_collect,                     /* todo_flags_finish */
+  'w'                                   /* letter */
+};
+
+struct tree_opt_pass pass_rtl_dse3 =
 {
-  "dse",                                /* name */
+  "dse3",                               /* name */
   gate_dse,                             /* gate */
   rest_of_handle_dse,                   /* execute */
   NULL,                                 /* sub */
   NULL,                                 /* next */
   0,                                    /* static_pass_number */
-  TV_DSE,                               /* tv_id */
+  TV_DSE3,                              /* tv_id */
   0,                                    /* properties_required */
   0,                                    /* properties_provided */
   0,                                    /* properties_destroyed */
Index: sched-ebb.c
===================================================================
--- sched-ebb.c	(revision 115861)
+++ sched-ebb.c	(working copy)
@@ -554,7 +554,7 @@ schedule_ebbs (void)
   /* Taking care of this degenerate case makes the rest of
      this code simpler.  */
   if (n_basic_blocks == NUM_FIXED_BLOCKS)
-    return;
+    return NULL;
 
   /* We need current_sched_info in init_dependency_caches, which is
      invoked via sched_init.  */
Index: haifa-sched.c
===================================================================
--- haifa-sched.c	(revision 115812)
+++ haifa-sched.c	(working copy)
@@ -4271,13 +4271,10 @@ extend_bb (basic_block bb)
    If EBB is EXIT_BLOCK_PTR, then BB is recovery block.
    If EBB is NULL, then BB should be a new region.  */
 void
-add_block (struct df *df, basic_block bb, basic_block ebb)
+add_block (struct df *df ATTRIBUTE_UNUSED, basic_block bb, basic_block ebb)
 {
-  gcc_assert (current_sched_info->flags & DETACH_LIFE_INFO
-	      && DF_LIVE_IN (df, bb) == NULL
-	      && DF_LIVE_OUT (df, bb) == NULL);
-
-  extend_bb (bb);
+  gcc_assert (current_sched_info->flags & DETACH_LIFE_INFO);
+  extend_bb (bb); 
 
   glat_start[bb->index] = 0;
   glat_end[bb->index] = 0;

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]