This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Fix clone streaming
- From: Richard Guenther <rguenther at suse dot de>
- To: Jan Hubicka <hubicka at ucw dot cz>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Sun, 16 May 2010 20:13:16 +0200 (CEST)
- Subject: Re: Fix clone streaming
- References: <20100516180623.GI16915@kam.mff.cuni.cz>
On Sun, 16 May 2010, Jan Hubicka wrote:
> Hi,
> this patch fixes streaming of clones so we can stream non-inline clones too.
> These differ from inline clones by having different declaration than the master,
> so instead of just streaming a bit if node is clone, we not stream info what
> node it is clone of.
>
> We also need to decide what bodies to output - when outputting a clone, we
> always have to arrange the master to have a body. This can not be determined
> easilly from node set anymore, because we might have a clone that have inline
> clone. Inline clode is part of partition, but the clone and its master are not
> (they might be used in different partition). For this reason I added body
> predicates to cgraph encoder that are similar to initializer feature of varpool
> encoder.
>
> Bootstrapped/regtested x86_64-linux in separation as well as with combination
> of all changes needed to enable ipa-cp. I also tested ipa-cp at gamess/gcc/dlv.
> OK?
>
> * cgraph.c (cgraph_clone_node): Take decl argument and insert
> clone into hash when it is different from orig.
> (cgraph_create_virtual_clone): Update use of cgraph_clone_node.
> * cgraph.h (cgraph_clone_node): Update prototype.
> * lto-cgrpah.c (lto_cgraph_encoder_new): Create body map.
> (lto_cgraph_encoder_delete): Delete body map.
> (lto_cgraph_encoder_size): Move to header.
> (lto_cgraph_encoder_encode_body_p, lto_set_cgraph_encoder_encode_body): New.
> (lto_output_node): Do not take written_decls argument; output clone_of
> pointer.
> (add_node_to): Add include_body_argument; call
> lto_set_cgraph_encoder_encode_body on master of the clone.
> (add_references): Update use of add_node_to.
> (compute_ltrans_boundary): Likewise.
> (output_cgraph): Do not create written_decls bitmap.
> (input_node): Take nodes argument; stream in clone_of correctly.
> (input_cgraph_1): Update use of input_node.
> * lto-streamer-out.c (lto_output): Use encoder info to decide
> what bodies to output.
> * ipa-inline.c (cgraph_clone_inlined_nodes,
> cgraph_decide_recursive_inlining): Update call of cgraph_clone_node.
> * lto-streamer.h (lto_cgraph_encoder_d): Add body.
> (lto_cgraph_encoder_size): Define here.
> (lto_cgraph_encoder_encode_body_p, lto_varpool_encoder_encode_body_p):
> Declare.
> Index: cgraph.c
> ===================================================================
> --- cgraph.c (revision 159454)
> +++ cgraph.c (working copy)
> @@ -2051,7 +2053,7 @@ cgraph_clone_edge (struct cgraph_edge *e
> function's profile to reflect the fact that part of execution is handled
> by node. */
> struct cgraph_node *
> -cgraph_clone_node (struct cgraph_node *n, gcov_type count, int freq,
> +cgraph_clone_node (struct cgraph_node *n, tree decl, gcov_type count, int freq,
Need to update the comment for the new parameter.
Also watch long lines in the ChangeLog entry and the patch.
Ok with that change.
Thanks,
Richard.
> int loop_nest, bool update_original,
> VEC(cgraph_edge_p,heap) *redirect_callers)
> {
> @@ -2060,7 +2062,7 @@ cgraph_clone_node (struct cgraph_node *n
> gcov_type count_scale;
> unsigned i;
>
> - new_node->decl = n->decl;
> + new_node->decl = decl;
> new_node->origin = n->origin;
> if (new_node->origin)
> {
> @@ -2118,6 +2120,24 @@ cgraph_clone_node (struct cgraph_node *n
> new_node->clone_of = n;
>
> cgraph_call_node_duplication_hooks (n, new_node);
> + if (n->decl != decl)
> + {
> + struct cgraph_node **slot;
> + slot = (struct cgraph_node **) htab_find_slot (cgraph_hash, new_node, INSERT);
> + gcc_assert (!*slot);
> + *slot = new_node;
> + if (assembler_name_hash)
> + {
> + void **aslot;
> + tree name = DECL_ASSEMBLER_NAME (decl);
> +
> + aslot = htab_find_slot_with_hash (assembler_name_hash, name,
> + decl_assembler_name_hash (name),
> + INSERT);
> + gcc_assert (!*aslot);
> + *aslot = new_node;
> + }
> + }
> return new_node;
> }
>
> @@ -2159,7 +2179,6 @@ cgraph_create_virtual_clone (struct cgra
> tree old_decl = old_node->decl;
> struct cgraph_node *new_node = NULL;
> tree new_decl;
> - struct cgraph_node key, **slot;
> size_t i;
> struct ipa_replace_map *map;
>
> @@ -2177,10 +2196,9 @@ cgraph_create_virtual_clone (struct cgra
> SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
> SET_DECL_RTL (new_decl, NULL);
>
> - new_node = cgraph_clone_node (old_node, old_node->count,
> + new_node = cgraph_clone_node (old_node, new_decl, old_node->count,
> CGRAPH_FREQ_BASE, 0, false,
> redirect_callers);
> - new_node->decl = new_decl;
> /* Update the properties.
> Make clone visible only within this translation unit. Make sure
> that is not weak also.
> @@ -2243,21 +2261,6 @@ cgraph_create_virtual_clone (struct cgra
> new_node->lowered = true;
> new_node->reachable = true;
>
> - key.decl = new_decl;
> - slot = (struct cgraph_node **) htab_find_slot (cgraph_hash, &key, INSERT);
> - gcc_assert (!*slot);
> - *slot = new_node;
> - if (assembler_name_hash)
> - {
> - void **aslot;
> - tree name = DECL_ASSEMBLER_NAME (new_decl);
> -
> - aslot = htab_find_slot_with_hash (assembler_name_hash, name,
> - decl_assembler_name_hash (name),
> - INSERT);
> - gcc_assert (!*aslot);
> - *aslot = new_node;
> - }
>
> return new_node;
> }
> Index: cgraph.h
> ===================================================================
> --- cgraph.h (revision 159454)
> +++ cgraph.h (working copy)
> @@ -544,7 +547,7 @@ const char * cgraph_node_name (struct cg
> struct cgraph_edge * cgraph_clone_edge (struct cgraph_edge *,
> struct cgraph_node *, gimple,
> unsigned, gcov_type, int, int, bool);
> -struct cgraph_node * cgraph_clone_node (struct cgraph_node *, gcov_type, int,
> +struct cgraph_node * cgraph_clone_node (struct cgraph_node *, tree, gcov_type, int,
> int, bool, VEC(cgraph_edge_p,heap) *);
>
> void cgraph_redirect_edge_callee (struct cgraph_edge *, struct cgraph_node *);
> Index: lto-cgraph.c
> ===================================================================
> --- lto-cgraph.c (revision 159454)
> +++ lto-cgraph.c (working copy)
> @@ -71,6 +71,7 @@ lto_cgraph_encoder_new (void)
> lto_cgraph_encoder_t encoder = XCNEW (struct lto_cgraph_encoder_d);
> encoder->map = pointer_map_create ();
> encoder->nodes = NULL;
> + encoder->body = pointer_set_create ();
> return encoder;
> }
>
> @@ -82,6 +83,7 @@ lto_cgraph_encoder_delete (lto_cgraph_en
> {
> VEC_free (cgraph_node_ptr, heap, encoder->nodes);
> pointer_map_destroy (encoder->map);
> + pointer_set_destroy (encoder->body);
> free (encoder);
> }
>
> @@ -137,12 +139,22 @@ lto_cgraph_encoder_deref (lto_cgraph_enc
> }
>
>
> -/* Return number of encoded nodes in ENCODER. */
> +/* Return TRUE if we should encode initializer of NODE (if any). */
>
> -static int
> -lto_cgraph_encoder_size (lto_cgraph_encoder_t encoder)
> +bool
> +lto_cgraph_encoder_encode_body_p (lto_cgraph_encoder_t encoder,
> + struct cgraph_node *node)
> +{
> + return pointer_set_contains (encoder->body, node);
> +}
> +
> +/* Return TRUE if we should encode body of NODE (if any). */
> +
> +static void
> +lto_set_cgraph_encoder_encode_body (lto_cgraph_encoder_t encoder,
> + struct cgraph_node *node)
> {
> - return VEC_length (cgraph_node_ptr, encoder->nodes);
> + pointer_set_insert (encoder->body, node);
> }
>
> /* Create a new varpool encoder. */
> @@ -394,17 +406,16 @@ reachable_from_this_partition_p (struct
> static void
> lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
> lto_cgraph_encoder_t encoder, cgraph_node_set set,
> - varpool_node_set vset,
> - bitmap written_decls)
> + varpool_node_set vset)
> {
> unsigned int tag;
> struct bitpack_d *bp;
> - bool boundary_p, wrote_decl_p;
> + bool boundary_p;
> intptr_t ref;
> bool in_other_partition = false;
> + struct cgraph_node *clone_of;
>
> boundary_p = !cgraph_node_in_set_p (node, set);
> - wrote_decl_p = bitmap_bit_p (written_decls, DECL_UID (node->decl));
>
> if (node->analyzed && !boundary_p)
> tag = LTO_cgraph_analyzed_node;
> @@ -436,10 +447,18 @@ lto_output_node (struct lto_simple_outpu
> in_other_partition = 1;
> }
>
> - lto_output_uleb128_stream (ob->main_stream, wrote_decl_p);
> + clone_of = node->clone_of;
> + while (clone_of
> + && (ref = lto_cgraph_encoder_lookup (encoder, node->clone_of)) == LCC_NOT_FOUND)
> + if (clone_of->prev_sibling_clone)
> + clone_of = clone_of->prev_sibling_clone;
> + else
> + clone_of = clone_of->clone_of;
> + if (!clone_of)
> + lto_output_sleb128_stream (ob->main_stream, LCC_NOT_FOUND);
> + else
> + lto_output_sleb128_stream (ob->main_stream, ref);
>
> - if (!wrote_decl_p)
> - bitmap_set_bit (written_decls, DECL_UID (node->decl));
>
> lto_output_fn_decl_index (ob->decl_state, ob->main_stream, node->decl);
> lto_output_sleb128_stream (ob->main_stream, node->count);
> @@ -636,11 +656,15 @@ output_profile_summary (struct lto_simpl
>
> /* Add NODE into encoder as well as nodes it is cloned from.
> Do it in a way so clones appear first. */
> +
> static void
> -add_node_to (lto_cgraph_encoder_t encoder, struct cgraph_node *node)
> +add_node_to (lto_cgraph_encoder_t encoder, struct cgraph_node *node,
> + bool include_body)
> {
> if (node->clone_of)
> - add_node_to (encoder, node->clone_of);
> + add_node_to (encoder, node->clone_of, include_body);
> + else if (include_body)
> + lto_set_cgraph_encoder_encode_body (encoder, node);
> lto_cgraph_encoder_encode (encoder, node);
> }
>
> @@ -655,7 +679,7 @@ add_references (lto_cgraph_encoder_t enc
> struct ipa_ref *ref;
> for (i = 0; ipa_ref_list_reference_iterate (list, i, ref); i++)
> if (ref->refered_type == IPA_REF_CGRAPH)
> - add_node_to (encoder, ipa_ref_node (ref));
> + add_node_to (encoder, ipa_ref_node (ref), false);
> else
> {
> struct varpool_node *vnode = ipa_ref_varpool_node (ref);
> @@ -757,7 +781,7 @@ compute_ltrans_boundary (struct lto_out_
> for (csi = csi_start (set); !csi_end_p (csi); csi_next (&csi))
> {
> node = csi_node (csi);
> - add_node_to (encoder, node);
> + add_node_to (encoder, node, true);
> add_references (encoder, varpool_encoder, &node->ref_list);
> }
> for (vsi = vsi_start (vset); !vsi_end_p (vsi); vsi_next (&vsi))
> @@ -797,7 +821,7 @@ compute_ltrans_boundary (struct lto_out_
> {
> /* We should have moved all the inlines. */
> gcc_assert (!callee->global.inlined_to);
> - add_node_to (encoder, callee);
> + add_node_to (encoder, callee, false);
> }
> }
> }
> @@ -812,7 +836,6 @@ output_cgraph (cgraph_node_set set, varp
> struct lto_simple_output_block *ob;
> cgraph_node_set_iterator csi;
> int i, n_nodes;
> - bitmap written_decls;
> lto_cgraph_encoder_t encoder;
> lto_varpool_encoder_t varpool_encoder;
> struct cgraph_asm_node *can;
> @@ -828,11 +851,6 @@ output_cgraph (cgraph_node_set set, varp
> encoder = ob->decl_state->cgraph_node_encoder;
> varpool_encoder = ob->decl_state->varpool_node_encoder;
>
> - /* The FUNCTION_DECLs for which we have written a node. The first
> - node found is written as the "original" node, the remaining nodes
> - are considered its clones. */
> - written_decls = lto_bitmap_alloc ();
> -
> /* Write out the nodes. We must first output a node and then its clones,
> otherwise at a time reading back the node there would be nothing to clone
> from. */
> @@ -840,11 +858,9 @@ output_cgraph (cgraph_node_set set, varp
> for (i = 0; i < n_nodes; i++)
> {
> node = lto_cgraph_encoder_deref (encoder, i);
> - lto_output_node (ob, node, encoder, set, vset, written_decls);
> + lto_output_node (ob, node, encoder, set, vset);
> }
>
> - lto_bitmap_free (written_decls);
> -
> /* Go over the nodes in SET again to write edges. */
> for (csi = csi_start (set); !csi_end_p (csi); csi_next (&csi))
> {
> @@ -950,30 +967,32 @@ output_varpool (cgraph_node_set set, var
> static struct cgraph_node *
> input_node (struct lto_file_decl_data *file_data,
> struct lto_input_block *ib,
> - enum LTO_cgraph_tags tag)
> + enum LTO_cgraph_tags tag,
> + VEC(cgraph_node_ptr, heap) *nodes)
> {
> tree fn_decl;
> struct cgraph_node *node;
> struct bitpack_d *bp;
> int stack_size = 0;
> unsigned decl_index;
> - bool clone_p;
> int ref = LCC_NOT_FOUND, ref2 = LCC_NOT_FOUND;
> int self_time = 0;
> int self_size = 0;
> int time_inlining_benefit = 0;
> int size_inlining_benefit = 0;
> unsigned long same_body_count = 0;
> + int clone_ref;
>
> - clone_p = (lto_input_uleb128 (ib) != 0);
> + clone_ref = lto_input_sleb128 (ib);
>
> decl_index = lto_input_uleb128 (ib);
> fn_decl = lto_file_decl_data_get_fn_decl (file_data, decl_index);
>
> - if (clone_p)
> - node = cgraph_clone_node (cgraph_node (fn_decl), 0,
> - CGRAPH_FREQ_BASE, 0, false, NULL);
> -
> + if (clone_ref != LCC_NOT_FOUND)
> + {
> + node = cgraph_clone_node (VEC_index (cgraph_node_ptr, nodes, clone_ref), fn_decl,
> + 0, CGRAPH_FREQ_BASE, 0, false, NULL);
> + }
> else
> node = cgraph_node (fn_decl);
>
> @@ -1214,7 +1233,7 @@ input_cgraph_1 (struct lto_file_decl_dat
> input_edge (ib, nodes, true);
> else
> {
> - node = input_node (file_data, ib, tag);
> + node = input_node (file_data, ib, tag,nodes);
> if (node == NULL || node->decl == NULL_TREE)
> internal_error ("bytecode stream: found empty cgraph node");
> VEC_safe_push (cgraph_node_ptr, heap, nodes, node);
> Index: lto-streamer-out.c
> ===================================================================
> --- lto-streamer-out.c (revision 159454)
> +++ lto-streamer-out.c (working copy)
> @@ -2090,18 +2090,25 @@ lto_output (cgraph_node_set set, varpool
> {
> struct cgraph_node *node;
> struct lto_out_decl_state *decl_state;
> - cgraph_node_set_iterator csi;
> +#ifdef ENABLE_CHECKING
> bitmap output = lto_bitmap_alloc ();
> +#endif
> + int i, n_nodes;
> + lto_cgraph_encoder_t encoder = lto_get_out_decl_state ()->cgraph_node_encoder;
>
> lto_writer_init ();
>
> + n_nodes = lto_cgraph_encoder_size (encoder);
> /* Process only the functions with bodies. */
> - for (csi = csi_start (set); !csi_end_p (csi); csi_next (&csi))
> + for (i = 0; i < n_nodes; i++)
> {
> - node = csi_node (csi);
> - if (node->analyzed && !bitmap_bit_p (output, DECL_UID (node->decl)))
> + node = lto_cgraph_encoder_deref (encoder, i);
> + if (lto_cgraph_encoder_encode_body_p (encoder, node))
> {
> +#ifdef ENABLE_CHECKING
> + gcc_assert (!bitmap_bit_p (output, DECL_UID (node->decl)));
> bitmap_set_bit (output, DECL_UID (node->decl));
> +#endif
> decl_state = lto_new_out_decl_state ();
> lto_push_out_decl_state (decl_state);
> if (!flag_wpa)
> Index: ipa-inline.c
> ===================================================================
> --- ipa-inline.c (revision 159454)
> +++ ipa-inline.c (working copy)
> @@ -268,7 +268,8 @@ cgraph_clone_inlined_nodes (struct cgrap
> else
> {
> struct cgraph_node *n;
> - n = cgraph_clone_node (e->callee, e->count, e->frequency, e->loop_nest,
> + n = cgraph_clone_node (e->callee, e->callee->decl,
> + e->count, e->frequency, e->loop_nest,
> update_original, NULL);
> cgraph_redirect_edge_callee (e, n);
> }
> @@ -808,7 +809,8 @@ cgraph_decide_recursive_inlining (struct
> cgraph_node_name (node));
>
> /* We need original clone to copy around. */
> - master_clone = cgraph_clone_node (node, node->count, CGRAPH_FREQ_BASE, 1,
> + master_clone = cgraph_clone_node (node, node->decl,
> + node->count, CGRAPH_FREQ_BASE, 1,
> false, NULL);
> master_clone->needed = true;
> for (e = master_clone->callees; e; e = e->next_callee)
> Index: lto-streamer.h
> ===================================================================
> --- lto-streamer.h (revision 159454)
> +++ lto-streamer.h (working copy)
> @@ -467,10 +467,22 @@ struct lto_cgraph_encoder_d
>
> /* Map reference number to node. */
> VEC(cgraph_node_ptr,heap) *nodes;
> +
> + /* Map of nodes where we want to output body. */
> + struct pointer_set_t *body;
> };
>
> typedef struct lto_cgraph_encoder_d *lto_cgraph_encoder_t;
>
> +/* Return number of encoded nodes in ENCODER. */
> +
> +static inline int
> +lto_cgraph_encoder_size (lto_cgraph_encoder_t encoder)
> +{
> + return VEC_length (cgraph_node_ptr, encoder->nodes);
> +}
> +
> +
> /* Encoder data structure used to stream callgraph nodes. */
> struct lto_varpool_encoder_d
> {
> @@ -851,6 +863,11 @@ int lto_cgraph_encoder_lookup (lto_cgrap
> lto_cgraph_encoder_t lto_cgraph_encoder_new (void);
> int lto_cgraph_encoder_encode (lto_cgraph_encoder_t, struct cgraph_node *);
> void lto_cgraph_encoder_delete (lto_cgraph_encoder_t);
> +bool lto_cgraph_encoder_encode_body_p (lto_cgraph_encoder_t,
> + struct cgraph_node *);
> +
> +bool lto_varpool_encoder_encode_body_p (lto_varpool_encoder_t,
> + struct varpool_node *);
> struct varpool_node *lto_varpool_encoder_deref (lto_varpool_encoder_t, int);
> int lto_varpool_encoder_lookup (lto_varpool_encoder_t, struct varpool_node *);
> lto_varpool_encoder_t lto_varpool_encoder_new (void);
>
>
--
Richard Guenther <rguenther@suse.de>
Novell / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 - GF: Markus Rex