This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Fix clone streaming


Hi,
this patch fixes streaming of clones so we can stream non-inline clones too.
These differ from inline clones by having different declaration than the master,
so instead of just streaming a bit if node is clone, we not stream info what
node it is clone of.

We also need to decide what bodies to output - when outputting a clone, we
always have to arrange the master to have a body.  This can not be determined
easilly from node set anymore, because we might have a clone that have inline
clone.  Inline clode is part of partition, but the clone and its master are not
(they might be used in different partition).  For this reason I added body
predicates to cgraph encoder that are similar to initializer feature of varpool
encoder.

Bootstrapped/regtested x86_64-linux in separation as well as with combination
of all changes needed to enable ipa-cp. I also tested ipa-cp at gamess/gcc/dlv.
OK?

	* cgraph.c (cgraph_clone_node): Take decl argument and insert
	clone into hash when it is different from orig.
	(cgraph_create_virtual_clone): Update use of cgraph_clone_node.
	* cgraph.h (cgraph_clone_node): Update prototype.
	* lto-cgrpah.c (lto_cgraph_encoder_new): Create body map.
	(lto_cgraph_encoder_delete): Delete body map.
	(lto_cgraph_encoder_size): Move to header.
	(lto_cgraph_encoder_encode_body_p, lto_set_cgraph_encoder_encode_body): New.
	(lto_output_node): Do not take written_decls argument; output clone_of
	pointer.
	(add_node_to): Add include_body_argument; call
	lto_set_cgraph_encoder_encode_body on master of the clone.
	(add_references): Update use of add_node_to.
	(compute_ltrans_boundary): Likewise.
	(output_cgraph): Do not create written_decls bitmap.
	(input_node): Take nodes argument; stream in clone_of correctly.
	(input_cgraph_1): Update use of input_node.
	* lto-streamer-out.c (lto_output): Use encoder info to decide
	what bodies to output.
	* ipa-inline.c (cgraph_clone_inlined_nodes,
	cgraph_decide_recursive_inlining): Update call of cgraph_clone_node.
	* lto-streamer.h (lto_cgraph_encoder_d): Add body.
	(lto_cgraph_encoder_size): Define here.
	(lto_cgraph_encoder_encode_body_p, lto_varpool_encoder_encode_body_p):
	Declare.
Index: cgraph.c
===================================================================
--- cgraph.c	(revision 159454)
+++ cgraph.c	(working copy)
@@ -2051,7 +2053,7 @@ cgraph_clone_edge (struct cgraph_edge *e
    function's profile to reflect the fact that part of execution is handled
    by node.  */
 struct cgraph_node *
-cgraph_clone_node (struct cgraph_node *n, gcov_type count, int freq,
+cgraph_clone_node (struct cgraph_node *n, tree decl, gcov_type count, int freq,
 		   int loop_nest, bool update_original,
 		   VEC(cgraph_edge_p,heap) *redirect_callers)
 {
@@ -2060,7 +2062,7 @@ cgraph_clone_node (struct cgraph_node *n
   gcov_type count_scale;
   unsigned i;
 
-  new_node->decl = n->decl;
+  new_node->decl = decl;
   new_node->origin = n->origin;
   if (new_node->origin)
     {
@@ -2118,6 +2120,24 @@ cgraph_clone_node (struct cgraph_node *n
   new_node->clone_of = n;
 
   cgraph_call_node_duplication_hooks (n, new_node);
+  if (n->decl != decl)
+    {
+      struct cgraph_node **slot;
+      slot = (struct cgraph_node **) htab_find_slot (cgraph_hash, new_node, INSERT);
+      gcc_assert (!*slot);
+      *slot = new_node;
+      if (assembler_name_hash)
+	{
+	  void **aslot;
+	  tree name = DECL_ASSEMBLER_NAME (decl);
+
+	  aslot = htab_find_slot_with_hash (assembler_name_hash, name,
+					    decl_assembler_name_hash (name),
+					    INSERT);
+	  gcc_assert (!*aslot);
+	  *aslot = new_node;
+	}
+    }
   return new_node;
 }
 
@@ -2159,7 +2179,6 @@ cgraph_create_virtual_clone (struct cgra
   tree old_decl = old_node->decl;
   struct cgraph_node *new_node = NULL;
   tree new_decl;
-  struct cgraph_node key, **slot;
   size_t i;
   struct ipa_replace_map *map;
 
@@ -2177,10 +2196,9 @@ cgraph_create_virtual_clone (struct cgra
   SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
   SET_DECL_RTL (new_decl, NULL);
 
-  new_node = cgraph_clone_node (old_node, old_node->count,
+  new_node = cgraph_clone_node (old_node, new_decl, old_node->count,
 				CGRAPH_FREQ_BASE, 0, false,
 				redirect_callers);
-  new_node->decl = new_decl;
   /* Update the properties.
      Make clone visible only within this translation unit.  Make sure
      that is not weak also.
@@ -2243,21 +2261,6 @@ cgraph_create_virtual_clone (struct cgra
   new_node->lowered = true;
   new_node->reachable = true;
 
-  key.decl = new_decl;
-  slot = (struct cgraph_node **) htab_find_slot (cgraph_hash, &key, INSERT);
-  gcc_assert (!*slot);
-  *slot = new_node;
-  if (assembler_name_hash)
-    {
-      void **aslot;
-      tree name = DECL_ASSEMBLER_NAME (new_decl);
-
-      aslot = htab_find_slot_with_hash (assembler_name_hash, name,
-					decl_assembler_name_hash (name),
-					INSERT);
-      gcc_assert (!*aslot);
-      *aslot = new_node;
-    }
 
   return new_node;
 }
Index: cgraph.h
===================================================================
--- cgraph.h	(revision 159454)
+++ cgraph.h	(working copy)
@@ -544,7 +547,7 @@ const char * cgraph_node_name (struct cg
 struct cgraph_edge * cgraph_clone_edge (struct cgraph_edge *,
 					struct cgraph_node *, gimple,
 					unsigned, gcov_type, int, int, bool);
-struct cgraph_node * cgraph_clone_node (struct cgraph_node *, gcov_type, int,
+struct cgraph_node * cgraph_clone_node (struct cgraph_node *, tree, gcov_type, int,
 					int, bool, VEC(cgraph_edge_p,heap) *);
 
 void cgraph_redirect_edge_callee (struct cgraph_edge *, struct cgraph_node *);
Index: lto-cgraph.c
===================================================================
--- lto-cgraph.c	(revision 159454)
+++ lto-cgraph.c	(working copy)
@@ -71,6 +71,7 @@ lto_cgraph_encoder_new (void)
   lto_cgraph_encoder_t encoder = XCNEW (struct lto_cgraph_encoder_d);
   encoder->map = pointer_map_create ();
   encoder->nodes = NULL;
+  encoder->body = pointer_set_create ();
   return encoder;
 }
 
@@ -82,6 +83,7 @@ lto_cgraph_encoder_delete (lto_cgraph_en
 {
    VEC_free (cgraph_node_ptr, heap, encoder->nodes);
    pointer_map_destroy (encoder->map);
+   pointer_set_destroy (encoder->body);
    free (encoder);
 }
 
@@ -137,12 +139,22 @@ lto_cgraph_encoder_deref (lto_cgraph_enc
 }
 
 
-/* Return number of encoded nodes in ENCODER.  */
+/* Return TRUE if we should encode initializer of NODE (if any).  */
 
-static int
-lto_cgraph_encoder_size (lto_cgraph_encoder_t encoder)
+bool
+lto_cgraph_encoder_encode_body_p (lto_cgraph_encoder_t encoder,
+				  struct cgraph_node *node)
+{
+  return pointer_set_contains (encoder->body, node);
+}
+
+/* Return TRUE if we should encode body of NODE (if any).  */
+
+static void
+lto_set_cgraph_encoder_encode_body (lto_cgraph_encoder_t encoder,
+				    struct cgraph_node *node)
 {
-  return VEC_length (cgraph_node_ptr, encoder->nodes);
+  pointer_set_insert (encoder->body, node);
 }
 
 /* Create a new varpool encoder.  */
@@ -394,17 +406,16 @@ reachable_from_this_partition_p (struct 
 static void
 lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
 		 lto_cgraph_encoder_t encoder, cgraph_node_set set,
-		 varpool_node_set vset,
-		 bitmap written_decls)
+		 varpool_node_set vset)
 {
   unsigned int tag;
   struct bitpack_d *bp;
-  bool boundary_p, wrote_decl_p;
+  bool boundary_p;
   intptr_t ref;
   bool in_other_partition = false;
+  struct cgraph_node *clone_of;
 
   boundary_p = !cgraph_node_in_set_p (node, set);
-  wrote_decl_p = bitmap_bit_p (written_decls, DECL_UID (node->decl));
 
   if (node->analyzed && !boundary_p)
     tag = LTO_cgraph_analyzed_node;
@@ -436,10 +447,18 @@ lto_output_node (struct lto_simple_outpu
       in_other_partition = 1;
     }
 
-  lto_output_uleb128_stream (ob->main_stream, wrote_decl_p);
+  clone_of = node->clone_of;
+  while (clone_of
+	 && (ref = lto_cgraph_encoder_lookup (encoder, node->clone_of)) == LCC_NOT_FOUND)
+    if (clone_of->prev_sibling_clone)
+      clone_of = clone_of->prev_sibling_clone;
+    else
+      clone_of = clone_of->clone_of;
+  if (!clone_of)
+    lto_output_sleb128_stream (ob->main_stream, LCC_NOT_FOUND);
+  else
+    lto_output_sleb128_stream (ob->main_stream, ref);
 
-  if (!wrote_decl_p)
-    bitmap_set_bit (written_decls, DECL_UID (node->decl));
 
   lto_output_fn_decl_index (ob->decl_state, ob->main_stream, node->decl);
   lto_output_sleb128_stream (ob->main_stream, node->count);
@@ -636,11 +656,15 @@ output_profile_summary (struct lto_simpl
 
 /* Add NODE into encoder as well as nodes it is cloned from.
    Do it in a way so clones appear first.  */
+
 static void
-add_node_to (lto_cgraph_encoder_t encoder, struct cgraph_node *node)
+add_node_to (lto_cgraph_encoder_t encoder, struct cgraph_node *node,
+	     bool include_body)
 {
   if (node->clone_of)
-    add_node_to (encoder, node->clone_of);
+    add_node_to (encoder, node->clone_of, include_body);
+  else if (include_body)
+    lto_set_cgraph_encoder_encode_body (encoder, node);
   lto_cgraph_encoder_encode (encoder, node);
 }
 
@@ -655,7 +679,7 @@ add_references (lto_cgraph_encoder_t enc
   struct ipa_ref *ref;
   for (i = 0; ipa_ref_list_reference_iterate (list, i, ref); i++)
     if (ref->refered_type == IPA_REF_CGRAPH)
-      add_node_to (encoder, ipa_ref_node (ref));
+      add_node_to (encoder, ipa_ref_node (ref), false);
     else
       {
 	struct varpool_node *vnode = ipa_ref_varpool_node (ref);
@@ -757,7 +781,7 @@ compute_ltrans_boundary (struct lto_out_
   for (csi = csi_start (set); !csi_end_p (csi); csi_next (&csi))
     {
       node = csi_node (csi);
-      add_node_to (encoder, node);
+      add_node_to (encoder, node, true);
       add_references (encoder, varpool_encoder, &node->ref_list);
     }
   for (vsi = vsi_start (vset); !vsi_end_p (vsi); vsi_next (&vsi))
@@ -797,7 +821,7 @@ compute_ltrans_boundary (struct lto_out_
 	    {
 	      /* We should have moved all the inlines.  */
 	      gcc_assert (!callee->global.inlined_to);
-	      add_node_to (encoder, callee);
+	      add_node_to (encoder, callee, false);
 	    }
 	}
     }
@@ -812,7 +836,6 @@ output_cgraph (cgraph_node_set set, varp
   struct lto_simple_output_block *ob;
   cgraph_node_set_iterator csi;
   int i, n_nodes;
-  bitmap written_decls;
   lto_cgraph_encoder_t encoder;
   lto_varpool_encoder_t varpool_encoder;
   struct cgraph_asm_node *can;
@@ -828,11 +851,6 @@ output_cgraph (cgraph_node_set set, varp
   encoder = ob->decl_state->cgraph_node_encoder;
   varpool_encoder = ob->decl_state->varpool_node_encoder;
 
-  /* The FUNCTION_DECLs for which we have written a node.  The first
-     node found is written as the "original" node, the remaining nodes
-     are considered its clones.  */
-  written_decls = lto_bitmap_alloc ();
-
   /* Write out the nodes.  We must first output a node and then its clones,
      otherwise at a time reading back the node there would be nothing to clone
      from.  */
@@ -840,11 +858,9 @@ output_cgraph (cgraph_node_set set, varp
   for (i = 0; i < n_nodes; i++)
     {
       node = lto_cgraph_encoder_deref (encoder, i);
-      lto_output_node (ob, node, encoder, set, vset, written_decls);
+      lto_output_node (ob, node, encoder, set, vset);
     }
 
-  lto_bitmap_free (written_decls);
-
   /* Go over the nodes in SET again to write edges.  */
   for (csi = csi_start (set); !csi_end_p (csi); csi_next (&csi))
     {
@@ -950,30 +967,32 @@ output_varpool (cgraph_node_set set, var
 static struct cgraph_node *
 input_node (struct lto_file_decl_data *file_data,
 	    struct lto_input_block *ib,
-	    enum LTO_cgraph_tags tag)
+	    enum LTO_cgraph_tags tag,
+	    VEC(cgraph_node_ptr, heap) *nodes)
 {
   tree fn_decl;
   struct cgraph_node *node;
   struct bitpack_d *bp;
   int stack_size = 0;
   unsigned decl_index;
-  bool clone_p;
   int ref = LCC_NOT_FOUND, ref2 = LCC_NOT_FOUND;
   int self_time = 0;
   int self_size = 0;
   int time_inlining_benefit = 0;
   int size_inlining_benefit = 0;
   unsigned long same_body_count = 0;
+  int clone_ref;
 
-  clone_p = (lto_input_uleb128 (ib) != 0);
+  clone_ref = lto_input_sleb128 (ib);
 
   decl_index = lto_input_uleb128 (ib);
   fn_decl = lto_file_decl_data_get_fn_decl (file_data, decl_index);
 
-  if (clone_p)
-    node = cgraph_clone_node (cgraph_node (fn_decl), 0,
-			      CGRAPH_FREQ_BASE, 0, false, NULL);
-
+  if (clone_ref != LCC_NOT_FOUND)
+    {
+      node = cgraph_clone_node (VEC_index (cgraph_node_ptr, nodes, clone_ref), fn_decl,
+				0, CGRAPH_FREQ_BASE, 0, false, NULL);
+    }
   else
     node = cgraph_node (fn_decl);
 
@@ -1214,7 +1233,7 @@ input_cgraph_1 (struct lto_file_decl_dat
         input_edge (ib, nodes, true);
       else
 	{
-	  node = input_node (file_data, ib, tag);
+	  node = input_node (file_data, ib, tag,nodes);
 	  if (node == NULL || node->decl == NULL_TREE)
 	    internal_error ("bytecode stream: found empty cgraph node");
 	  VEC_safe_push (cgraph_node_ptr, heap, nodes, node);
Index: lto-streamer-out.c
===================================================================
--- lto-streamer-out.c	(revision 159454)
+++ lto-streamer-out.c	(working copy)
@@ -2090,18 +2090,25 @@ lto_output (cgraph_node_set set, varpool
 {
   struct cgraph_node *node;
   struct lto_out_decl_state *decl_state;
-  cgraph_node_set_iterator csi;
+#ifdef ENABLE_CHECKING
   bitmap output = lto_bitmap_alloc ();
+#endif
+  int i, n_nodes;
+  lto_cgraph_encoder_t encoder = lto_get_out_decl_state ()->cgraph_node_encoder;
 
   lto_writer_init ();
 
+  n_nodes = lto_cgraph_encoder_size (encoder);
   /* Process only the functions with bodies.  */
-  for (csi = csi_start (set); !csi_end_p (csi); csi_next (&csi))
+  for (i = 0; i < n_nodes; i++)
     {
-      node = csi_node (csi);
-      if (node->analyzed && !bitmap_bit_p (output, DECL_UID (node->decl)))
+      node = lto_cgraph_encoder_deref (encoder, i);
+      if (lto_cgraph_encoder_encode_body_p (encoder, node))
 	{
+#ifdef ENABLE_CHECKING
+	  gcc_assert (!bitmap_bit_p (output, DECL_UID (node->decl)));
 	  bitmap_set_bit (output, DECL_UID (node->decl));
+#endif
 	  decl_state = lto_new_out_decl_state ();
 	  lto_push_out_decl_state (decl_state);
 	  if (!flag_wpa)
Index: ipa-inline.c
===================================================================
--- ipa-inline.c	(revision 159454)
+++ ipa-inline.c	(working copy)
@@ -268,7 +268,8 @@ cgraph_clone_inlined_nodes (struct cgrap
       else
 	{
 	  struct cgraph_node *n;
-	  n = cgraph_clone_node (e->callee, e->count, e->frequency, e->loop_nest,
+	  n = cgraph_clone_node (e->callee, e->callee->decl,
+				 e->count, e->frequency, e->loop_nest,
 				 update_original, NULL);
 	  cgraph_redirect_edge_callee (e, n);
 	}
@@ -808,7 +809,8 @@ cgraph_decide_recursive_inlining (struct
 	     cgraph_node_name (node));
 
   /* We need original clone to copy around.  */
-  master_clone = cgraph_clone_node (node, node->count, CGRAPH_FREQ_BASE, 1,
+  master_clone = cgraph_clone_node (node, node->decl,
+				    node->count, CGRAPH_FREQ_BASE, 1,
   				    false, NULL);
   master_clone->needed = true;
   for (e = master_clone->callees; e; e = e->next_callee)
Index: lto-streamer.h
===================================================================
--- lto-streamer.h	(revision 159454)
+++ lto-streamer.h	(working copy)
@@ -467,10 +467,22 @@ struct lto_cgraph_encoder_d
 
   /* Map reference number to node. */
   VEC(cgraph_node_ptr,heap) *nodes;
+
+  /* Map of nodes where we want to output body.  */
+  struct pointer_set_t *body;
 };
 
 typedef struct lto_cgraph_encoder_d *lto_cgraph_encoder_t;
 
+/* Return number of encoded nodes in ENCODER.  */
+
+static inline int
+lto_cgraph_encoder_size (lto_cgraph_encoder_t encoder)
+{
+  return VEC_length (cgraph_node_ptr, encoder->nodes);
+}
+
+
 /* Encoder data structure used to stream callgraph nodes.  */
 struct lto_varpool_encoder_d
 {
@@ -851,6 +863,11 @@ int lto_cgraph_encoder_lookup (lto_cgrap
 lto_cgraph_encoder_t lto_cgraph_encoder_new (void);
 int lto_cgraph_encoder_encode (lto_cgraph_encoder_t, struct cgraph_node *);
 void lto_cgraph_encoder_delete (lto_cgraph_encoder_t);
+bool lto_cgraph_encoder_encode_body_p (lto_cgraph_encoder_t,
+				       struct cgraph_node *);
+
+bool lto_varpool_encoder_encode_body_p (lto_varpool_encoder_t,
+				        struct varpool_node *);
 struct varpool_node *lto_varpool_encoder_deref (lto_varpool_encoder_t, int);
 int lto_varpool_encoder_lookup (lto_varpool_encoder_t, struct varpool_node *);
 lto_varpool_encoder_t lto_varpool_encoder_new (void);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]