This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Fix clone streaming


On Sun, 16 May 2010, Jan Hubicka wrote:

> Hi,
> this patch fixes streaming of clones so we can stream non-inline clones too.
> These differ from inline clones by having different declaration than the master,
> so instead of just streaming a bit if node is clone, we not stream info what
> node it is clone of.
> 
> We also need to decide what bodies to output - when outputting a clone, we
> always have to arrange the master to have a body.  This can not be determined
> easilly from node set anymore, because we might have a clone that have inline
> clone.  Inline clode is part of partition, but the clone and its master are not
> (they might be used in different partition).  For this reason I added body
> predicates to cgraph encoder that are similar to initializer feature of varpool
> encoder.
> 
> Bootstrapped/regtested x86_64-linux in separation as well as with combination
> of all changes needed to enable ipa-cp. I also tested ipa-cp at gamess/gcc/dlv.
> OK?
> 
> 	* cgraph.c (cgraph_clone_node): Take decl argument and insert
> 	clone into hash when it is different from orig.
> 	(cgraph_create_virtual_clone): Update use of cgraph_clone_node.
> 	* cgraph.h (cgraph_clone_node): Update prototype.
> 	* lto-cgrpah.c (lto_cgraph_encoder_new): Create body map.
> 	(lto_cgraph_encoder_delete): Delete body map.
> 	(lto_cgraph_encoder_size): Move to header.
> 	(lto_cgraph_encoder_encode_body_p, lto_set_cgraph_encoder_encode_body): New.
> 	(lto_output_node): Do not take written_decls argument; output clone_of
> 	pointer.
> 	(add_node_to): Add include_body_argument; call
> 	lto_set_cgraph_encoder_encode_body on master of the clone.
> 	(add_references): Update use of add_node_to.
> 	(compute_ltrans_boundary): Likewise.
> 	(output_cgraph): Do not create written_decls bitmap.
> 	(input_node): Take nodes argument; stream in clone_of correctly.
> 	(input_cgraph_1): Update use of input_node.
> 	* lto-streamer-out.c (lto_output): Use encoder info to decide
> 	what bodies to output.
> 	* ipa-inline.c (cgraph_clone_inlined_nodes,
> 	cgraph_decide_recursive_inlining): Update call of cgraph_clone_node.
> 	* lto-streamer.h (lto_cgraph_encoder_d): Add body.
> 	(lto_cgraph_encoder_size): Define here.
> 	(lto_cgraph_encoder_encode_body_p, lto_varpool_encoder_encode_body_p):
> 	Declare.
> Index: cgraph.c
> ===================================================================
> --- cgraph.c	(revision 159454)
> +++ cgraph.c	(working copy)
> @@ -2051,7 +2053,7 @@ cgraph_clone_edge (struct cgraph_edge *e
>     function's profile to reflect the fact that part of execution is handled
>     by node.  */
>  struct cgraph_node *
> -cgraph_clone_node (struct cgraph_node *n, gcov_type count, int freq,
> +cgraph_clone_node (struct cgraph_node *n, tree decl, gcov_type count, int freq,

Need to update the comment for the new parameter.

Also watch long lines in the ChangeLog entry and the patch.

Ok with that change.
Thanks,
Richard.

>  		   int loop_nest, bool update_original,
>  		   VEC(cgraph_edge_p,heap) *redirect_callers)
>  {
> @@ -2060,7 +2062,7 @@ cgraph_clone_node (struct cgraph_node *n
>    gcov_type count_scale;
>    unsigned i;
>  
> -  new_node->decl = n->decl;
> +  new_node->decl = decl;
>    new_node->origin = n->origin;
>    if (new_node->origin)
>      {
> @@ -2118,6 +2120,24 @@ cgraph_clone_node (struct cgraph_node *n
>    new_node->clone_of = n;
>  
>    cgraph_call_node_duplication_hooks (n, new_node);
> +  if (n->decl != decl)
> +    {
> +      struct cgraph_node **slot;
> +      slot = (struct cgraph_node **) htab_find_slot (cgraph_hash, new_node, INSERT);
> +      gcc_assert (!*slot);
> +      *slot = new_node;
> +      if (assembler_name_hash)
> +	{
> +	  void **aslot;
> +	  tree name = DECL_ASSEMBLER_NAME (decl);
> +
> +	  aslot = htab_find_slot_with_hash (assembler_name_hash, name,
> +					    decl_assembler_name_hash (name),
> +					    INSERT);
> +	  gcc_assert (!*aslot);
> +	  *aslot = new_node;
> +	}
> +    }
>    return new_node;
>  }
>  
> @@ -2159,7 +2179,6 @@ cgraph_create_virtual_clone (struct cgra
>    tree old_decl = old_node->decl;
>    struct cgraph_node *new_node = NULL;
>    tree new_decl;
> -  struct cgraph_node key, **slot;
>    size_t i;
>    struct ipa_replace_map *map;
>  
> @@ -2177,10 +2196,9 @@ cgraph_create_virtual_clone (struct cgra
>    SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
>    SET_DECL_RTL (new_decl, NULL);
>  
> -  new_node = cgraph_clone_node (old_node, old_node->count,
> +  new_node = cgraph_clone_node (old_node, new_decl, old_node->count,
>  				CGRAPH_FREQ_BASE, 0, false,
>  				redirect_callers);
> -  new_node->decl = new_decl;
>    /* Update the properties.
>       Make clone visible only within this translation unit.  Make sure
>       that is not weak also.
> @@ -2243,21 +2261,6 @@ cgraph_create_virtual_clone (struct cgra
>    new_node->lowered = true;
>    new_node->reachable = true;
>  
> -  key.decl = new_decl;
> -  slot = (struct cgraph_node **) htab_find_slot (cgraph_hash, &key, INSERT);
> -  gcc_assert (!*slot);
> -  *slot = new_node;
> -  if (assembler_name_hash)
> -    {
> -      void **aslot;
> -      tree name = DECL_ASSEMBLER_NAME (new_decl);
> -
> -      aslot = htab_find_slot_with_hash (assembler_name_hash, name,
> -					decl_assembler_name_hash (name),
> -					INSERT);
> -      gcc_assert (!*aslot);
> -      *aslot = new_node;
> -    }
>  
>    return new_node;
>  }
> Index: cgraph.h
> ===================================================================
> --- cgraph.h	(revision 159454)
> +++ cgraph.h	(working copy)
> @@ -544,7 +547,7 @@ const char * cgraph_node_name (struct cg
>  struct cgraph_edge * cgraph_clone_edge (struct cgraph_edge *,
>  					struct cgraph_node *, gimple,
>  					unsigned, gcov_type, int, int, bool);
> -struct cgraph_node * cgraph_clone_node (struct cgraph_node *, gcov_type, int,
> +struct cgraph_node * cgraph_clone_node (struct cgraph_node *, tree, gcov_type, int,
>  					int, bool, VEC(cgraph_edge_p,heap) *);
>  
>  void cgraph_redirect_edge_callee (struct cgraph_edge *, struct cgraph_node *);
> Index: lto-cgraph.c
> ===================================================================
> --- lto-cgraph.c	(revision 159454)
> +++ lto-cgraph.c	(working copy)
> @@ -71,6 +71,7 @@ lto_cgraph_encoder_new (void)
>    lto_cgraph_encoder_t encoder = XCNEW (struct lto_cgraph_encoder_d);
>    encoder->map = pointer_map_create ();
>    encoder->nodes = NULL;
> +  encoder->body = pointer_set_create ();
>    return encoder;
>  }
>  
> @@ -82,6 +83,7 @@ lto_cgraph_encoder_delete (lto_cgraph_en
>  {
>     VEC_free (cgraph_node_ptr, heap, encoder->nodes);
>     pointer_map_destroy (encoder->map);
> +   pointer_set_destroy (encoder->body);
>     free (encoder);
>  }
>  
> @@ -137,12 +139,22 @@ lto_cgraph_encoder_deref (lto_cgraph_enc
>  }
>  
>  
> -/* Return number of encoded nodes in ENCODER.  */
> +/* Return TRUE if we should encode initializer of NODE (if any).  */
>  
> -static int
> -lto_cgraph_encoder_size (lto_cgraph_encoder_t encoder)
> +bool
> +lto_cgraph_encoder_encode_body_p (lto_cgraph_encoder_t encoder,
> +				  struct cgraph_node *node)
> +{
> +  return pointer_set_contains (encoder->body, node);
> +}
> +
> +/* Return TRUE if we should encode body of NODE (if any).  */
> +
> +static void
> +lto_set_cgraph_encoder_encode_body (lto_cgraph_encoder_t encoder,
> +				    struct cgraph_node *node)
>  {
> -  return VEC_length (cgraph_node_ptr, encoder->nodes);
> +  pointer_set_insert (encoder->body, node);
>  }
>  
>  /* Create a new varpool encoder.  */
> @@ -394,17 +406,16 @@ reachable_from_this_partition_p (struct 
>  static void
>  lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
>  		 lto_cgraph_encoder_t encoder, cgraph_node_set set,
> -		 varpool_node_set vset,
> -		 bitmap written_decls)
> +		 varpool_node_set vset)
>  {
>    unsigned int tag;
>    struct bitpack_d *bp;
> -  bool boundary_p, wrote_decl_p;
> +  bool boundary_p;
>    intptr_t ref;
>    bool in_other_partition = false;
> +  struct cgraph_node *clone_of;
>  
>    boundary_p = !cgraph_node_in_set_p (node, set);
> -  wrote_decl_p = bitmap_bit_p (written_decls, DECL_UID (node->decl));
>  
>    if (node->analyzed && !boundary_p)
>      tag = LTO_cgraph_analyzed_node;
> @@ -436,10 +447,18 @@ lto_output_node (struct lto_simple_outpu
>        in_other_partition = 1;
>      }
>  
> -  lto_output_uleb128_stream (ob->main_stream, wrote_decl_p);
> +  clone_of = node->clone_of;
> +  while (clone_of
> +	 && (ref = lto_cgraph_encoder_lookup (encoder, node->clone_of)) == LCC_NOT_FOUND)
> +    if (clone_of->prev_sibling_clone)
> +      clone_of = clone_of->prev_sibling_clone;
> +    else
> +      clone_of = clone_of->clone_of;
> +  if (!clone_of)
> +    lto_output_sleb128_stream (ob->main_stream, LCC_NOT_FOUND);
> +  else
> +    lto_output_sleb128_stream (ob->main_stream, ref);
>  
> -  if (!wrote_decl_p)
> -    bitmap_set_bit (written_decls, DECL_UID (node->decl));
>  
>    lto_output_fn_decl_index (ob->decl_state, ob->main_stream, node->decl);
>    lto_output_sleb128_stream (ob->main_stream, node->count);
> @@ -636,11 +656,15 @@ output_profile_summary (struct lto_simpl
>  
>  /* Add NODE into encoder as well as nodes it is cloned from.
>     Do it in a way so clones appear first.  */
> +
>  static void
> -add_node_to (lto_cgraph_encoder_t encoder, struct cgraph_node *node)
> +add_node_to (lto_cgraph_encoder_t encoder, struct cgraph_node *node,
> +	     bool include_body)
>  {
>    if (node->clone_of)
> -    add_node_to (encoder, node->clone_of);
> +    add_node_to (encoder, node->clone_of, include_body);
> +  else if (include_body)
> +    lto_set_cgraph_encoder_encode_body (encoder, node);
>    lto_cgraph_encoder_encode (encoder, node);
>  }
>  
> @@ -655,7 +679,7 @@ add_references (lto_cgraph_encoder_t enc
>    struct ipa_ref *ref;
>    for (i = 0; ipa_ref_list_reference_iterate (list, i, ref); i++)
>      if (ref->refered_type == IPA_REF_CGRAPH)
> -      add_node_to (encoder, ipa_ref_node (ref));
> +      add_node_to (encoder, ipa_ref_node (ref), false);
>      else
>        {
>  	struct varpool_node *vnode = ipa_ref_varpool_node (ref);
> @@ -757,7 +781,7 @@ compute_ltrans_boundary (struct lto_out_
>    for (csi = csi_start (set); !csi_end_p (csi); csi_next (&csi))
>      {
>        node = csi_node (csi);
> -      add_node_to (encoder, node);
> +      add_node_to (encoder, node, true);
>        add_references (encoder, varpool_encoder, &node->ref_list);
>      }
>    for (vsi = vsi_start (vset); !vsi_end_p (vsi); vsi_next (&vsi))
> @@ -797,7 +821,7 @@ compute_ltrans_boundary (struct lto_out_
>  	    {
>  	      /* We should have moved all the inlines.  */
>  	      gcc_assert (!callee->global.inlined_to);
> -	      add_node_to (encoder, callee);
> +	      add_node_to (encoder, callee, false);
>  	    }
>  	}
>      }
> @@ -812,7 +836,6 @@ output_cgraph (cgraph_node_set set, varp
>    struct lto_simple_output_block *ob;
>    cgraph_node_set_iterator csi;
>    int i, n_nodes;
> -  bitmap written_decls;
>    lto_cgraph_encoder_t encoder;
>    lto_varpool_encoder_t varpool_encoder;
>    struct cgraph_asm_node *can;
> @@ -828,11 +851,6 @@ output_cgraph (cgraph_node_set set, varp
>    encoder = ob->decl_state->cgraph_node_encoder;
>    varpool_encoder = ob->decl_state->varpool_node_encoder;
>  
> -  /* The FUNCTION_DECLs for which we have written a node.  The first
> -     node found is written as the "original" node, the remaining nodes
> -     are considered its clones.  */
> -  written_decls = lto_bitmap_alloc ();
> -
>    /* Write out the nodes.  We must first output a node and then its clones,
>       otherwise at a time reading back the node there would be nothing to clone
>       from.  */
> @@ -840,11 +858,9 @@ output_cgraph (cgraph_node_set set, varp
>    for (i = 0; i < n_nodes; i++)
>      {
>        node = lto_cgraph_encoder_deref (encoder, i);
> -      lto_output_node (ob, node, encoder, set, vset, written_decls);
> +      lto_output_node (ob, node, encoder, set, vset);
>      }
>  
> -  lto_bitmap_free (written_decls);
> -
>    /* Go over the nodes in SET again to write edges.  */
>    for (csi = csi_start (set); !csi_end_p (csi); csi_next (&csi))
>      {
> @@ -950,30 +967,32 @@ output_varpool (cgraph_node_set set, var
>  static struct cgraph_node *
>  input_node (struct lto_file_decl_data *file_data,
>  	    struct lto_input_block *ib,
> -	    enum LTO_cgraph_tags tag)
> +	    enum LTO_cgraph_tags tag,
> +	    VEC(cgraph_node_ptr, heap) *nodes)
>  {
>    tree fn_decl;
>    struct cgraph_node *node;
>    struct bitpack_d *bp;
>    int stack_size = 0;
>    unsigned decl_index;
> -  bool clone_p;
>    int ref = LCC_NOT_FOUND, ref2 = LCC_NOT_FOUND;
>    int self_time = 0;
>    int self_size = 0;
>    int time_inlining_benefit = 0;
>    int size_inlining_benefit = 0;
>    unsigned long same_body_count = 0;
> +  int clone_ref;
>  
> -  clone_p = (lto_input_uleb128 (ib) != 0);
> +  clone_ref = lto_input_sleb128 (ib);
>  
>    decl_index = lto_input_uleb128 (ib);
>    fn_decl = lto_file_decl_data_get_fn_decl (file_data, decl_index);
>  
> -  if (clone_p)
> -    node = cgraph_clone_node (cgraph_node (fn_decl), 0,
> -			      CGRAPH_FREQ_BASE, 0, false, NULL);
> -
> +  if (clone_ref != LCC_NOT_FOUND)
> +    {
> +      node = cgraph_clone_node (VEC_index (cgraph_node_ptr, nodes, clone_ref), fn_decl,
> +				0, CGRAPH_FREQ_BASE, 0, false, NULL);
> +    }
>    else
>      node = cgraph_node (fn_decl);
>  
> @@ -1214,7 +1233,7 @@ input_cgraph_1 (struct lto_file_decl_dat
>          input_edge (ib, nodes, true);
>        else
>  	{
> -	  node = input_node (file_data, ib, tag);
> +	  node = input_node (file_data, ib, tag,nodes);
>  	  if (node == NULL || node->decl == NULL_TREE)
>  	    internal_error ("bytecode stream: found empty cgraph node");
>  	  VEC_safe_push (cgraph_node_ptr, heap, nodes, node);
> Index: lto-streamer-out.c
> ===================================================================
> --- lto-streamer-out.c	(revision 159454)
> +++ lto-streamer-out.c	(working copy)
> @@ -2090,18 +2090,25 @@ lto_output (cgraph_node_set set, varpool
>  {
>    struct cgraph_node *node;
>    struct lto_out_decl_state *decl_state;
> -  cgraph_node_set_iterator csi;
> +#ifdef ENABLE_CHECKING
>    bitmap output = lto_bitmap_alloc ();
> +#endif
> +  int i, n_nodes;
> +  lto_cgraph_encoder_t encoder = lto_get_out_decl_state ()->cgraph_node_encoder;
>  
>    lto_writer_init ();
>  
> +  n_nodes = lto_cgraph_encoder_size (encoder);
>    /* Process only the functions with bodies.  */
> -  for (csi = csi_start (set); !csi_end_p (csi); csi_next (&csi))
> +  for (i = 0; i < n_nodes; i++)
>      {
> -      node = csi_node (csi);
> -      if (node->analyzed && !bitmap_bit_p (output, DECL_UID (node->decl)))
> +      node = lto_cgraph_encoder_deref (encoder, i);
> +      if (lto_cgraph_encoder_encode_body_p (encoder, node))
>  	{
> +#ifdef ENABLE_CHECKING
> +	  gcc_assert (!bitmap_bit_p (output, DECL_UID (node->decl)));
>  	  bitmap_set_bit (output, DECL_UID (node->decl));
> +#endif
>  	  decl_state = lto_new_out_decl_state ();
>  	  lto_push_out_decl_state (decl_state);
>  	  if (!flag_wpa)
> Index: ipa-inline.c
> ===================================================================
> --- ipa-inline.c	(revision 159454)
> +++ ipa-inline.c	(working copy)
> @@ -268,7 +268,8 @@ cgraph_clone_inlined_nodes (struct cgrap
>        else
>  	{
>  	  struct cgraph_node *n;
> -	  n = cgraph_clone_node (e->callee, e->count, e->frequency, e->loop_nest,
> +	  n = cgraph_clone_node (e->callee, e->callee->decl,
> +				 e->count, e->frequency, e->loop_nest,
>  				 update_original, NULL);
>  	  cgraph_redirect_edge_callee (e, n);
>  	}
> @@ -808,7 +809,8 @@ cgraph_decide_recursive_inlining (struct
>  	     cgraph_node_name (node));
>  
>    /* We need original clone to copy around.  */
> -  master_clone = cgraph_clone_node (node, node->count, CGRAPH_FREQ_BASE, 1,
> +  master_clone = cgraph_clone_node (node, node->decl,
> +				    node->count, CGRAPH_FREQ_BASE, 1,
>    				    false, NULL);
>    master_clone->needed = true;
>    for (e = master_clone->callees; e; e = e->next_callee)
> Index: lto-streamer.h
> ===================================================================
> --- lto-streamer.h	(revision 159454)
> +++ lto-streamer.h	(working copy)
> @@ -467,10 +467,22 @@ struct lto_cgraph_encoder_d
>  
>    /* Map reference number to node. */
>    VEC(cgraph_node_ptr,heap) *nodes;
> +
> +  /* Map of nodes where we want to output body.  */
> +  struct pointer_set_t *body;
>  };
>  
>  typedef struct lto_cgraph_encoder_d *lto_cgraph_encoder_t;
>  
> +/* Return number of encoded nodes in ENCODER.  */
> +
> +static inline int
> +lto_cgraph_encoder_size (lto_cgraph_encoder_t encoder)
> +{
> +  return VEC_length (cgraph_node_ptr, encoder->nodes);
> +}
> +
> +
>  /* Encoder data structure used to stream callgraph nodes.  */
>  struct lto_varpool_encoder_d
>  {
> @@ -851,6 +863,11 @@ int lto_cgraph_encoder_lookup (lto_cgrap
>  lto_cgraph_encoder_t lto_cgraph_encoder_new (void);
>  int lto_cgraph_encoder_encode (lto_cgraph_encoder_t, struct cgraph_node *);
>  void lto_cgraph_encoder_delete (lto_cgraph_encoder_t);
> +bool lto_cgraph_encoder_encode_body_p (lto_cgraph_encoder_t,
> +				       struct cgraph_node *);
> +
> +bool lto_varpool_encoder_encode_body_p (lto_varpool_encoder_t,
> +				        struct varpool_node *);
>  struct varpool_node *lto_varpool_encoder_deref (lto_varpool_encoder_t, int);
>  int lto_varpool_encoder_lookup (lto_varpool_encoder_t, struct varpool_node *);
>  lto_varpool_encoder_t lto_varpool_encoder_new (void);
> 
> 

-- 
Richard Guenther <rguenther@suse.de>
Novell / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 - GF: Markus Rex


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]