This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Whole program optimization and functions-only-called-once.


Hi,
this is WIP patch to deal with the unreachable clones problem.  It
basically renders the clones as unanalyzed cgraph nodes (but with still
body in) so IPA passes don't see them.

Honza

Index: cgraph.c
===================================================================
--- cgraph.c	(revision 154127)
+++ cgraph.c	(working copy)
@@ -1132,7 +1132,7 @@ cgraph_release_function_body (struct cgr
       pop_cfun();
       gimple_set_body (node->decl, NULL);
       VEC_free (ipa_opt_pass, heap,
-      		DECL_STRUCT_FUNCTION (node->decl)->ipa_transforms_to_apply);
+      		node->ipa_transforms_to_apply);
       /* Struct function hangs a lot of data that would leak if we didn't
          removed all pointers to it.   */
       ggc_free (DECL_STRUCT_FUNCTION (node->decl));
@@ -1159,6 +1159,8 @@ cgraph_remove_node (struct cgraph_node *
   cgraph_call_node_removal_hooks (node);
   cgraph_node_remove_callers (node);
   cgraph_node_remove_callees (node);
+  VEC_free (ipa_opt_pass, heap,
+            node->ipa_transforms_to_apply);
 
   /* Incremental inlining access removed nodes stored in the postorder list.
      */
Index: cgraph.h
===================================================================
--- cgraph.h	(revision 154127)
+++ cgraph.h	(working copy)
@@ -190,6 +190,11 @@ struct GTY((chain_next ("%h.next"), chai
 
   PTR GTY ((skip)) aux;
 
+  /* Interprocedural passes scheduled to have their transform functions
+     applied next time we execute local pass on them.  We maintain it
+     per-function in order to allow IPA passes to introduce new functions.  */
+  VEC(ipa_opt_pass,heap) * GTY((skip)) ipa_transforms_to_apply;
+
   struct cgraph_local_info local;
   struct cgraph_global_info global;
   struct cgraph_rtl_info rtl;
@@ -206,16 +211,24 @@ struct GTY((chain_next ("%h.next"), chai
      number of cfg nodes with -fprofile-generate and -fprofile-use */
   int pid;
 
-  /* Set when function must be output - it is externally visible
-     or its address is taken.  */
+  /* Set when function must be output for some reason.  The primary
+     use of this flag is to mark functions needed to be output for
+     non-standard reason.  Functions that are externally visible
+     or reachable from functions needed to be output are marked
+     by specialized flags.  */
   unsigned needed : 1;
-  /* Set when function has address taken.  */
+  /* Set when function has address taken.
+     In current implementation it imply needed flag. */
   unsigned address_taken : 1;
   /* Set when decl is an abstract function pointed to by the
      ABSTRACT_DECL_ORIGIN of a reachable function.  */
   unsigned abstract_and_needed : 1;
   /* Set when function is reachable by call from other function
-     that is either reachable or needed.  */
+     that is either reachable or needed.  
+     This flag is computed at original cgraph construction and then
+     updated in cgraph_remove_unreachable_nodes.  Note that after
+     cgraph_remove_unreachable_nodes cgraph still can contain unreachable
+     nodes when they are needed for virtual clone instantiation.  */
   unsigned reachable : 1;
   /* Set once the function is lowered (i.e. its CFG is built).  */
   unsigned lowered : 1;
Index: cgraphunit.c
===================================================================
--- cgraphunit.c	(revision 154127)
+++ cgraphunit.c	(working copy)
@@ -699,7 +699,7 @@ verify_cgraph_node (struct cgraph_node *
       error_found = true;
     }
 
-  if (node->analyzed && gimple_has_body_p (node->decl)
+  if (node->analyzed && node->reachable && gimple_has_body_p (node->decl)
       && !TREE_ASM_WRITTEN (node->decl)
       && (!DECL_EXTERNAL (node->decl) || node->global.inlined_to)
       && !flag_wpa)
@@ -1777,8 +1777,8 @@ save_inline_function_body (struct cgraph
   TREE_PUBLIC (first_clone->decl) = 0;
   DECL_COMDAT (first_clone->decl) = 0;
   VEC_free (ipa_opt_pass, heap,
-            DECL_STRUCT_FUNCTION (first_clone->decl)->ipa_transforms_to_apply);
-  DECL_STRUCT_FUNCTION (first_clone->decl)->ipa_transforms_to_apply = NULL;
+            first_clone->ipa_transforms_to_apply);
+  first_clone->ipa_transforms_to_apply = NULL;
 
 #ifdef ENABLE_CHECKING
   verify_cgraph_node (first_clone);
@@ -1810,6 +1810,8 @@ cgraph_materialize_clone (struct cgraph_
     node->clone_of->clones = node->next_sibling_clone;
   node->next_sibling_clone = NULL;
   node->prev_sibling_clone = NULL;
+  if (!node->clone_of->analyzed && !node->clone_of->clones)
+    cgraph_remove_node (node->clone_of);
   node->clone_of = NULL;
   bitmap_obstack_release (NULL);
 }
Index: ipa-inline.c
===================================================================
--- ipa-inline.c	(revision 154127)
+++ ipa-inline.c	(working copy)
@@ -1120,7 +1120,7 @@ cgraph_decide_inlining (void)
   max_count = 0;
   max_benefit = 0;
   for (node = cgraph_nodes; node; node = node->next)
-    if (node->analyzed)
+    if (node->reachable)
       {
 	struct cgraph_edge *e;
 
Index: lto-streamer-in.c
===================================================================
--- lto-streamer-in.c	(revision 154127)
+++ lto-streamer-in.c	(working copy)
@@ -1476,6 +1476,7 @@ lto_read_body (struct lto_file_decl_data
       /* Restore decl state */
       file_data->current_decl_state = file_data->global_decl_state;
 
+#if 0
       /* FIXME: ipa_transforms_to_apply holds list of passes that have optimization
          summaries computed and needs to apply changes.  At the moment WHOPR only
          supports inlining, so we can push it here by hand.  In future we need to stream
@@ -1485,6 +1486,7 @@ lto_read_body (struct lto_file_decl_data
 	 VEC_safe_push (ipa_opt_pass, heap,
 			cfun->ipa_transforms_to_apply,
 			(ipa_opt_pass)&pass_ipa_inline);
+#endif
       pop_cfun ();
     }
   else 
Index: c-decl.c
===================================================================
--- c-decl.c	(revision 154127)
+++ c-decl.c	(working copy)
@@ -4497,6 +4497,7 @@ build_compound_literal (location_t loc, 
       set_compound_literal_name (decl);
       DECL_DEFER_OUTPUT (decl) = 1;
       DECL_COMDAT (decl) = 1;
+      TREE_PUBLIC (decl) = 1;
       DECL_ARTIFICIAL (decl) = 1;
       DECL_IGNORED_P (decl) = 1;
       pushdecl (decl);
Index: function.h
===================================================================
--- function.h	(revision 154127)
+++ function.h	(working copy)
@@ -522,11 +522,6 @@ struct GTY(()) function {
   unsigned int curr_properties;
   unsigned int last_verified;
 
-  /* Interprocedural passes scheduled to have their transform functions
-     applied next time we execute local pass on them.  We maintain it
-     per-function in order to allow IPA passes to introduce new functions.  */
-  VEC(ipa_opt_pass,heap) * GTY((skip)) ipa_transforms_to_apply;
-
   /* Non-null if the function does something that would prevent it from
      being copied; this applies to both versioning and inlining.  Set to
      a string describing the reason for failure.  */
Index: ipa.c
===================================================================
--- ipa.c	(revision 154128)
+++ ipa.c	(working copy)
@@ -121,6 +121,7 @@ bool
 cgraph_remove_unreachable_nodes (bool before_inlining_p, FILE *file)
 {
   struct cgraph_node *first = (struct cgraph_node *) (void *) 1;
+  struct cgraph_node *processed = (struct cgraph_node *) (void *) 2;
   struct cgraph_node *node, *next;
   bool changed = false;
 
@@ -142,9 +143,13 @@ cgraph_remove_unreachable_nodes (bool be
         gcc_assert (!node->global.inlined_to);
 	node->aux = first;
 	first = node;
+	node->reachable = true;
       }
     else
-      gcc_assert (!node->aux);
+      {
+        gcc_assert (!node->aux);
+	node->reachable = false;
+      }
 
   /* Perform reachability analysis.  As a special case do not consider
      extern inline functions not inlined as live because we won't output
@@ -154,17 +159,26 @@ cgraph_remove_unreachable_nodes (bool be
       struct cgraph_edge *e;
       node = first;
       first = (struct cgraph_node *) first->aux;
+      node->aux = processed;
 
-      for (e = node->callees; e; e = e->next_callee)
-	if (!e->callee->aux
-	    && node->analyzed
-	    && (!e->inline_failed || !e->callee->analyzed
-		|| (!DECL_EXTERNAL (e->callee->decl))
-                || before_inlining_p))
-	  {
-	    e->callee->aux = first;
-	    first = e->callee;
-	  }
+      if (node->reachable)
+        for (e = node->callees; e; e = e->next_callee)
+	  if (!e->callee->reachable
+	      && node->analyzed
+	      && (!e->inline_failed || !e->callee->analyzed
+		  || (!DECL_EXTERNAL (e->callee->decl))
+                  || before_inlining_p))
+	    {
+	      bool prev_reachable = e->callee->reachable;
+	      e->callee->reachable |= node->reachable;
+	      if (!e->callee->aux
+	          || (e->callee->aux == processed
+		      && prev_reachable != e->callee->reachable))
+	        {
+	          e->callee->aux = first;
+	          first = e->callee;
+	        }
+	    }
       while (node->clone_of && !node->clone_of->aux && !gimple_has_body_p (node->decl))
         {
 	  node = node->clone_of;
@@ -184,13 +198,18 @@ cgraph_remove_unreachable_nodes (bool be
   for (node = cgraph_nodes; node; node = next)
     {
       next = node->next;
+      if (node->aux && !node->reachable)
+        {
+	  cgraph_node_remove_callees (node);
+	  node->analyzed = false;
+	  node->local.inlinable = false;
+	}
       if (!node->aux)
 	{
           node->global.inlined_to = NULL;
 	  if (file)
 	    fprintf (file, " %s", cgraph_node_name (node));
-	  if (!node->analyzed || !DECL_EXTERNAL (node->decl)
-	      || before_inlining_p)
+	  if (!node->analyzed || !DECL_EXTERNAL (node->decl) || before_inlining_p)
 	    cgraph_remove_node (node);
 	  else
 	    {
@@ -204,21 +223,16 @@ cgraph_remove_unreachable_nodes (bool be
 	      /* If so, we need to keep node in the callgraph.  */
 	      if (e || node->needed)
 		{
-		  struct cgraph_node *clone;
-
-		  /* If there are still clones, we must keep body around.
-		     Otherwise we can just remove the body but keep the clone.  */
-		  for (clone = node->clones; clone;
-		       clone = clone->next_sibling_clone)
-		    if (clone->aux)
-		      break;
-		  if (!clone)
-		    {
-		      cgraph_release_function_body (node);
-		      cgraph_node_remove_callees (node);
-		      node->analyzed = false;
-		      node->local.inlinable = false;
-		    }
+		  cgraph_release_function_body (node);
+		  cgraph_node_remove_callees (node);
+		  node->analyzed = false;
+		  node->local.inlinable = false;
+		  if (node->prev_sibling_clone)
+		    node->prev_sibling_clone->next_sibling_clone = node->next_sibling_clone;
+		  else if (node->clone_of)
+		    node->clone_of->clones = node->next_sibling_clone;
+		  if (node->next_sibling_clone)
+		    node->next_sibling_clone->prev_sibling_clone = node->prev_sibling_clone;
 		}
 	      else
 		cgraph_remove_node (node);
@@ -318,7 +332,7 @@ function_and_variable_visibility (bool w
     {
       if (!vnode->finalized)
         continue;
-      gcc_assert ((!DECL_WEAK (vnode->decl) && !DECL_COMMON (vnode->decl))
+      gcc_assert ((!DECL_WEAK (vnode->decl) && !DECL_COMMON (vnode->decl) && !DECL_COMDAT (vnode->decl))
       		  || TREE_PUBLIC (vnode->decl) || DECL_EXTERNAL (vnode->decl));
       if (vnode->needed
 	  && (DECL_COMDAT (vnode->decl) || TREE_PUBLIC (vnode->decl))
Index: tree-inline.c
===================================================================
--- tree-inline.c	(revision 154127)
+++ tree-inline.c	(working copy)
@@ -1983,9 +1983,6 @@ initialize_cfun (tree new_fndecl, tree c
   cfun->function_end_locus = src_cfun->function_end_locus;
   cfun->curr_properties = src_cfun->curr_properties;
   cfun->last_verified = src_cfun->last_verified;
-  if (src_cfun->ipa_transforms_to_apply)
-    cfun->ipa_transforms_to_apply = VEC_copy (ipa_opt_pass, heap,
-					      src_cfun->ipa_transforms_to_apply);
   cfun->va_list_gpr_size = src_cfun->va_list_gpr_size;
   cfun->va_list_fpr_size = src_cfun->va_list_fpr_size;
   cfun->function_frequency = src_cfun->function_frequency;
@@ -3822,6 +3819,10 @@ expand_call_inline (basic_block bb, gimp
   (*debug_hooks->outlining_inline_function) (cg_edge->callee->decl);
 
   /* Update callgraph if needed.  */
+  if (cg_edge->callee->clone_of
+      && !cg_edge->callee->clone_of->next_sibling_clone
+      && !cg_edge->callee->analyzed)
+    cgraph_remove_node (cg_edge->callee);
   cgraph_remove_node (cg_edge->callee);
 
   id->block = NULL_TREE;
@@ -4848,6 +4849,19 @@ tree_function_versioning (tree old_decl,
   id.src_node = old_version_node;
   id.dst_node = new_version_node;
   id.src_cfun = DECL_STRUCT_FUNCTION (old_decl);
+  if (id.src_node->ipa_transforms_to_apply)
+    {
+      VEC(ipa_opt_pass,heap) * old_transforms_to_apply = id.dst_node->ipa_transforms_to_apply;
+      unsigned int i;
+
+      id.dst_node->ipa_transforms_to_apply = VEC_copy (ipa_opt_pass, heap,
+					               id.src_node->ipa_transforms_to_apply);
+      for (i = 0; i < VEC_length (ipa_opt_pass, old_transforms_to_apply); i++)
+        VEC_safe_push (ipa_opt_pass, heap, id.dst_node->ipa_transforms_to_apply,
+		       VEC_index (ipa_opt_pass,
+		       		  old_transforms_to_apply,
+				  i));
+    }
   
   id.copy_decl = copy_decl_no_change;
   id.transform_call_graph_edges
Index: passes.c
===================================================================
--- passes.c	(revision 154127)
+++ passes.c	(working copy)
@@ -1376,15 +1376,6 @@ update_properties_after_pass (void *data
 		           & ~pass->properties_destroyed;
 }
 
-/* Schedule IPA transform pass DATA for CFUN.  */
-
-static void
-add_ipa_transform_pass (void *data)
-{
-  struct ipa_opt_pass_d *ipa_pass = (struct ipa_opt_pass_d *) data;
-  VEC_safe_push (ipa_opt_pass, heap, cfun->ipa_transforms_to_apply, ipa_pass);
-}
-
 /* Execute summary generation for all of the passes in IPA_PASS.  */
 
 void
@@ -1464,19 +1455,22 @@ execute_one_ipa_transform_pass (struct c
 void
 execute_all_ipa_transforms (void)
 {
-  if (cfun && cfun->ipa_transforms_to_apply)
+  struct cgraph_node *node;
+  if (!cfun)
+    return;
+  node = cgraph_node (current_function_decl);
+  if (node->ipa_transforms_to_apply)
     {
       unsigned int i;
-      struct cgraph_node *node = cgraph_node (current_function_decl);
 
-      for (i = 0; i < VEC_length (ipa_opt_pass, cfun->ipa_transforms_to_apply);
+      for (i = 0; i < VEC_length (ipa_opt_pass, node->ipa_transforms_to_apply);
 	   i++)
 	execute_one_ipa_transform_pass (node,
 					VEC_index (ipa_opt_pass,
-						   cfun->ipa_transforms_to_apply,
+						   node->ipa_transforms_to_apply,
 						   i));
-      VEC_free (ipa_opt_pass, heap, cfun->ipa_transforms_to_apply);
-      cfun->ipa_transforms_to_apply = NULL;
+      VEC_free (ipa_opt_pass, heap, node->ipa_transforms_to_apply);
+      node->ipa_transforms_to_apply = NULL;
     }
 }
 
@@ -1551,7 +1545,13 @@ execute_one_pass (struct opt_pass *pass)
   execute_todo (todo_after | pass->todo_flags_finish);
   verify_interpass_invariants ();
   if (pass->type == IPA_PASS)
-    do_per_function (add_ipa_transform_pass, pass);
+    {
+      struct cgraph_node *node;
+      for (node = cgraph_nodes; node; node = node->next)
+        if (node->analyzed)
+          VEC_safe_push (ipa_opt_pass, heap, node->ipa_transforms_to_apply,
+			 (struct ipa_opt_pass_d *)pass);
+    }
 
   if (!current_function_decl)
     cgraph_process_new_functions ();


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]