This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Streaming of optimization summaries in ipa-reference


Hi,
this patch adds streaming of optimization summary to ipa-reference and enables
it with WHOPR. (so remaining IPA pass to enable is ipa-cp. Other all small ipa
passes that will probably go into ltrans units and with the support of future
sane WPA partitioner will probably be relatively effective before we turn then
into real IPA passes).

I also had to move ltrans boundary computation out of output_cgraph.  THe
reason is that encoder needs to be populated in particular order so clones
appear after function they are clonned from.  When we encoded the ipa-reference
first, the encoder already got populated but in the random order.

To make infromation useful at ltrans, we need to tell tree-ssa-alias to check
also public variables, since while partitioning some of previously static vars
may become hidden, but we still retain the info about their use.

I also managed to create a testcase based on buitin_constant_p.  It would be
more natural to scan dump files of ltrans but it seems that this is quite
plausible way to handle some cases too, so I will add new testcases for other
stuff too.

Bootstrapped/regtested x86_64-linux, OK for the LTO stremaing and aliasing
bits?  I also verified that file size effect on WPA units for gamess is not
earthshaking, we need 169353304 bytes instead of 168245328 bytes.
(gamess is known to excercise degenerate case of ipa-reference).

Honza
	* ipa-reference.c: Include toplev.h
	(is_proper_for_analysis): Only add to all_module_statics
	if it is allocated.
	(write_node_summary_p, stream_out_bitmap,
	ipa_reference_write_optimization_summary,
	ipa_reference_read_optimization_summary): New.
	(struct ipa_opt_pass_d pass_ipa_reference): Add
	optimization summary streaming.
	* lto-cgraph.c (referenced_from_this_partition_p, 
	reachable_from_this_partition_p): New functions.
	(compute_ltrans_boundary): Break out from ...
	(output_cgraph): ... here.
	* tree-ssa-alias.c (ref_maybe_used_by_call_p_1,
	call_may_clobber_ref_p_1): Ask ipa-reference even for public vars.
	* opts.c (decode_options): Enable ipa_reference.
	* Makefile.in (ipa-reference.o): Add toplev.h dependency.
	* lto-streamer.h (referenced_from_this_partition_p,
	reachable_from_this_partition_p): Declare.
	(compute_ltrans_boundary): Declare.
	* passes.c (ipa_write_summaries_1, ipa_write_optimization_summaries):
	Use compute_ltrans_boundary instead of allocating empty encoder.

	* testsuite/gcc.dg/lto/ipareference_1.c: New file.
	* testsuite/gcc.dg/lto/ipareference_2.c: New file.
Index: ipa-reference.c
===================================================================
--- ipa-reference.c	(revision 159430)
+++ ipa-reference.c	(working copy)
@@ -59,6 +59,7 @@ along with GCC; see the file COPYING3.  
 #include "diagnostic.h"
 #include "langhooks.h"
 #include "lto-streamer.h"
+#include "toplev.h"
 
 static void remove_node_data (struct cgraph_node *node,
 			      void *data ATTRIBUTE_UNUSED);
@@ -272,7 +273,8 @@ is_proper_for_analysis (tree t)
 
   /* This is a variable we care about.  Check if we have seen it
      before, and if not add it the set of variables we care about.  */
-  if (!bitmap_bit_p (all_module_statics, DECL_UID (t)))
+  if (all_module_statics
+      && !bitmap_bit_p (all_module_statics, DECL_UID (t)))
     add_static_var (t);
 
   return true;
@@ -884,6 +886,208 @@ propagate (void)
   return 0;
 }
 
+/* Return true if we need to write summary of NODE. */
+
+static bool
+write_node_summary_p (struct cgraph_node *node,
+		      cgraph_node_set set,
+		      varpool_node_set vset,
+		      bitmap ltrans_statics)
+{
+  ipa_reference_optimization_summary_t info;
+
+  /* See if we have (non-empty) info.  */
+  if (!node->analyzed || node->global.inlined_to)
+    return false;
+  info = get_reference_optimization_summary (node);
+  if (!info || (bitmap_empty_p (info->statics_not_read)
+		&& bitmap_empty_p (info->statics_not_written)))
+    return false;
+
+  /* See if we want to encode it.
+     Encode also referenced functions since constant folding might turn it into
+     a direct call.
+
+     In future we might also want to include summaries of functions references
+     by initializers of constant variables references in current unit.  */
+  if (!reachable_from_this_partition_p (node, set)
+      && !referenced_from_this_partition_p (&node->ref_list, set, vset))
+    return false;
+
+  /* See if the info has non-empty intersections with vars we want to encode.  */
+  if (!bitmap_intersect_p (info->statics_not_read, ltrans_statics)
+      && !bitmap_intersect_p (info->statics_not_written, ltrans_statics))
+    return false;
+  return true;
+}
+
+/* Stream out BITS&LTRANS_STATICS as list of decls to OB.  */
+
+static void
+stream_out_bitmap (struct lto_simple_output_block *ob,
+		   bitmap bits, bitmap ltrans_statics)
+{
+  unsigned int count = 0;
+  unsigned int index;
+  bitmap_iterator bi;
+  EXECUTE_IF_AND_IN_BITMAP (bits, ltrans_statics, 0, index, bi)
+    count ++;
+  lto_output_uleb128_stream (ob->main_stream, count);
+  if (!count)
+    return;
+  EXECUTE_IF_AND_IN_BITMAP (bits, ltrans_statics, 0, index, bi)
+    {
+      tree decl = (tree)splay_tree_lookup (reference_vars_to_consider, index)->value;
+      lto_output_var_decl_index(ob->decl_state, ob->main_stream, decl);
+    }
+}
+
+/* Serialize the ipa info for lto.  */
+
+static void
+ipa_reference_write_optimization_summary (cgraph_node_set set,
+					  varpool_node_set vset)
+{
+  struct cgraph_node *node;
+  struct varpool_node *vnode;
+  struct lto_simple_output_block *ob
+    = lto_create_simple_output_block (LTO_section_ipa_reference);
+  unsigned int count = 0;
+  lto_cgraph_encoder_t encoder = ob->decl_state->cgraph_node_encoder;
+  bitmap ltrans_statics = BITMAP_ALLOC (NULL);
+
+  reference_vars_to_consider = splay_tree_new (splay_tree_compare_ints, 0, 0);
+
+  /* See what variables we are interested in.  */
+  for (vnode = varpool_nodes; vnode; vnode = vnode->next)
+    if (referenced_from_this_partition_p (&vnode->ref_list, set, vset))
+      {
+	tree decl = vnode->decl;
+	if (is_proper_for_analysis (decl))
+	  {
+	    bitmap_set_bit (ltrans_statics, DECL_UID (decl));
+	    splay_tree_insert (reference_vars_to_consider,
+			       DECL_UID (decl), (splay_tree_value)decl);
+	  }
+      }
+
+  for (node = cgraph_nodes; node; node = node->next)
+    if (write_node_summary_p (node, set, vset, ltrans_statics))
+	count++;
+
+  lto_output_uleb128_stream (ob->main_stream, count);
+
+  /* Process all of the functions.  */
+  for (node = cgraph_nodes; node; node = node->next)
+    if (write_node_summary_p (node, set, vset, ltrans_statics))
+      {
+	ipa_reference_optimization_summary_t info;
+	int node_ref;
+
+	info = get_reference_optimization_summary (node);
+	node_ref = lto_cgraph_encoder_encode (encoder, node);
+	lto_output_uleb128_stream (ob->main_stream, node_ref);
+
+	stream_out_bitmap (ob, info->statics_not_read, ltrans_statics);
+	stream_out_bitmap (ob, info->statics_not_written, ltrans_statics);
+      }
+  BITMAP_FREE (ltrans_statics);
+  lto_destroy_simple_output_block (ob);
+  splay_tree_delete (reference_vars_to_consider);
+}
+
+/* Deserialize the ipa info for lto.  */
+
+static void
+ipa_reference_read_optimization_summary (void)
+{
+  struct lto_file_decl_data ** file_data_vec
+    = lto_get_file_decl_data ();
+  struct lto_file_decl_data * file_data;
+  unsigned int j = 0;
+  bitmap_obstack_initialize (&optimization_summary_obstack);
+
+  node_removal_hook_holder =
+      cgraph_add_node_removal_hook (&remove_node_data, NULL);
+  node_duplication_hook_holder =
+      cgraph_add_node_duplication_hook (&duplicate_node_data, NULL);
+
+  while ((file_data = file_data_vec[j++]))
+    {
+      const char *data;
+      size_t len;
+      struct lto_input_block *ib
+	= lto_create_simple_input_block (file_data,
+					 LTO_section_ipa_reference,
+					 &data, &len);
+      if (ib)
+	{
+	  unsigned int i;
+	  unsigned int f_count = lto_input_uleb128 (ib);
+
+	  for (i = 0; i < f_count; i++)
+	    {
+	      unsigned int j, index;
+	      struct cgraph_node *node;
+	      ipa_reference_optimization_summary_t info;
+	      int v_count;
+	      lto_cgraph_encoder_t encoder;
+
+	      index = lto_input_uleb128 (ib);
+	      encoder = file_data->cgraph_node_encoder;
+	      node = lto_cgraph_encoder_deref (encoder, index);
+	      info = XCNEW (struct ipa_reference_optimization_summary_d);
+	      set_reference_optimization_summary (node, info);
+	      info->statics_not_read = BITMAP_ALLOC (&optimization_summary_obstack);
+	      info->statics_not_written = BITMAP_ALLOC (&optimization_summary_obstack);
+	      if (dump_file)
+		fprintf (dump_file,
+			 "\nFunction name:%s/%i:\n  static not read:",
+			 cgraph_node_name (node), node->uid);
+
+	      /* Set the statics not read.  */
+	      v_count = lto_input_uleb128 (ib);
+	      for (j = 0; j < (unsigned int)v_count; j++)
+		{
+		  unsigned int var_index = lto_input_uleb128 (ib);
+		  tree v_decl = lto_file_decl_data_get_var_decl (file_data,
+								 var_index);
+		  bitmap_set_bit (info->statics_not_read, DECL_UID (v_decl));
+		  if (dump_file)
+		    fprintf (dump_file, " %s",
+			     lang_hooks.decl_printable_name (v_decl, 2));
+		}
+
+	      if (dump_file)
+		fprintf (dump_file,
+			 "\n  static not written:");
+	      /* Set the statics not written.  */
+	      v_count = lto_input_uleb128 (ib);
+	      for (j = 0; j < (unsigned int)v_count; j++)
+		{
+		  unsigned int var_index = lto_input_uleb128 (ib);
+		  tree v_decl = lto_file_decl_data_get_var_decl (file_data,
+								 var_index);
+		  bitmap_set_bit (info->statics_not_written, DECL_UID (v_decl));
+		  if (dump_file)
+		    fprintf (dump_file, " %s",
+			     lang_hooks.decl_printable_name (v_decl, 2));
+		}
+	      if (dump_file)
+		fprintf (dump_file, "\n");
+	    }
+
+	  lto_destroy_simple_input_block (file_data,
+					  LTO_section_ipa_reference,
+					  ib, data, len);
+	}
+      else
+	/* Fatal error here.  We do not want to support compiling ltrans units with
+	   different version of compiler or different flags than the WPA unit, so
+	   this should never happen.  */
+	fatal_error ("ipa reference summary is missing in ltrans unit");
+    }
+}
 
 static bool
 gate_reference (void)
@@ -913,8 +1117,8 @@ struct ipa_opt_pass_d pass_ipa_reference
  NULL,				        /* generate_summary */
  NULL,					/* write_summary */
  NULL,				 	/* read_summary */
- NULL,					/* write_optimization_summary */
- NULL,					/* read_optimization_summary */
+ ipa_reference_write_optimization_summary,/* write_optimization_summary */
+ ipa_reference_read_optimization_summary,/* read_optimization_summary */
  NULL,					/* stmt_fixup */
  0,					/* TODOs */
  NULL,			                /* function_transform */
Index: testsuite/gcc.dg/lto/ipareference_2.c
===================================================================
--- testsuite/gcc.dg/lto/ipareference_2.c	(revision 0)
+++ testsuite/gcc.dg/lto/ipareference_2.c	(revision 0)
@@ -0,0 +1,14 @@
+static int val;
+int set_val (void)
+{
+  val = 5;
+}
+int get_val (void)
+{
+  return val;
+}
+__attribute__ ((__noinline__))
+do_nothing ()
+{
+  asm volatile ("":::"memory");
+}
Index: testsuite/gcc.dg/lto/ipareference_1.c
===================================================================
--- testsuite/gcc.dg/lto/ipareference_1.c	(revision 0)
+++ testsuite/gcc.dg/lto/ipareference_1.c	(revision 0)
@@ -0,0 +1,26 @@
+/* { dg-lto-options {{ -O1 -fwhopr }} } */
+/* { dg-lto-do run } */
+
+/* Test that ipa-reference notice that get_val will not change since do_nothing does not
+   modify anything.  This needs streaming cross file boundary summaries.  */
+extern int get_val (void);
+extern int set_val (void);
+extern do_nothing (void);
+void abort (void);
+main()
+{
+  int a;
+  int b;
+  set_val ();
+  a = get_val ();
+  do_nothing();
+  b = get_val ();
+  if (a==b)
+    {
+      if (!,__builtin_constant_p (a==b))
+	abort ();
+      return 0;
+    }
+  else
+    abort ();
+}
Index: lto-cgraph.c
===================================================================
--- lto-cgraph.c	(revision 159429)
+++ lto-cgraph.c	(working copy)
@@ -302,6 +302,7 @@ lto_output_edge (struct lto_simple_outpu
 }
 
 /* Return if LIST contain references from other partitions.  */
+
 bool
 referenced_from_other_partition_p (struct ipa_ref_list *list, cgraph_node_set set,
 				   varpool_node_set vset)
@@ -341,6 +342,47 @@ reachable_from_other_partition_p (struct
   return false;
 }
 
+/* Return if LIST contain references from other partitions.  */
+
+bool
+referenced_from_this_partition_p (struct ipa_ref_list *list, cgraph_node_set set,
+				  varpool_node_set vset)
+{
+  int i;
+  struct ipa_ref *ref;
+  for (i = 0; ipa_ref_list_refering_iterate (list, i, ref); i++)
+    {
+      if (ref->refering_type == IPA_REF_CGRAPH)
+	{
+	  if (cgraph_node_in_set_p (ipa_ref_refering_node (ref), set))
+	    return true;
+	}
+      else
+	{
+	  if (varpool_node_in_set_p (ipa_ref_refering_varpool_node (ref),
+				     vset))
+	    return true;
+	}
+    }
+  return false;
+}
+
+/* Return true when node is reachable from other partition.  */
+
+bool
+reachable_from_this_partition_p (struct cgraph_node *node, cgraph_node_set set)
+{
+  struct cgraph_edge *e;
+  if (!node->analyzed)
+    return false;
+  if (node->global.inlined_to)
+    return false;
+  for (e = node->callers; e; e = e->next_caller)
+    if (cgraph_node_in_set_p (e->caller, set))
+      return true;
+  return false;
+}
+
 /* Output the cgraph NODE to OB.  ENCODER is used to find the
    reference number of NODE->inlined_to.  SET is the set of nodes we
    are writing to the current file.  If NODE is not in SET, then NODE
@@ -694,38 +736,22 @@ output_refs (cgraph_node_set set, varpoo
   lto_destroy_simple_output_block (ob);
 }
 
-
-/* Output the part of the cgraph in SET.  */
-
+/* Find out all cgraph and varpool nodes we want to encode in current unit
+   and insert them to encoders.  */
 void
-output_cgraph (cgraph_node_set set, varpool_node_set vset)
+compute_ltrans_boundary (struct lto_out_decl_state *state,
+			 cgraph_node_set set, varpool_node_set vset)
 {
   struct cgraph_node *node;
-  struct lto_simple_output_block *ob;
   cgraph_node_set_iterator csi;
   varpool_node_set_iterator vsi;
   struct cgraph_edge *edge;
-  int i, n_nodes;
-  bitmap written_decls;
+  int i;
   lto_cgraph_encoder_t encoder;
   lto_varpool_encoder_t varpool_encoder;
-  struct cgraph_asm_node *can;
-
-  ob = lto_create_simple_output_block (LTO_section_cgraph);
-
-  output_profile_summary (ob);
 
-  /* An encoder for cgraph nodes should have been created by
-     ipa_write_summaries_1.  */
-  gcc_assert (ob->decl_state->cgraph_node_encoder);
-  gcc_assert (ob->decl_state->varpool_node_encoder);
-  encoder = ob->decl_state->cgraph_node_encoder;
-  varpool_encoder = ob->decl_state->varpool_node_encoder;
-
-  /* The FUNCTION_DECLs for which we have written a node.  The first
-     node found is written as the "original" node, the remaining nodes
-     are considered its clones.  */
-  written_decls = lto_bitmap_alloc ();
+  encoder = state->cgraph_node_encoder = lto_cgraph_encoder_new ();
+  varpool_encoder = state->varpool_node_encoder = lto_varpool_encoder_new ();
 
   /* Go over all the nodes in SET and assign references.  */
   for (csi = csi_start (set); !csi_end_p (csi); csi_next (&csi))
@@ -775,6 +801,37 @@ output_cgraph (cgraph_node_set set, varp
 	    }
 	}
     }
+}
+
+/* Output the part of the cgraph in SET.  */
+
+void
+output_cgraph (cgraph_node_set set, varpool_node_set vset)
+{
+  struct cgraph_node *node;
+  struct lto_simple_output_block *ob;
+  cgraph_node_set_iterator csi;
+  int i, n_nodes;
+  bitmap written_decls;
+  lto_cgraph_encoder_t encoder;
+  lto_varpool_encoder_t varpool_encoder;
+  struct cgraph_asm_node *can;
+
+  ob = lto_create_simple_output_block (LTO_section_cgraph);
+
+  output_profile_summary (ob);
+
+  /* An encoder for cgraph nodes should have been created by
+     ipa_write_summaries_1.  */
+  gcc_assert (ob->decl_state->cgraph_node_encoder);
+  gcc_assert (ob->decl_state->varpool_node_encoder);
+  encoder = ob->decl_state->cgraph_node_encoder;
+  varpool_encoder = ob->decl_state->varpool_node_encoder;
+
+  /* The FUNCTION_DECLs for which we have written a node.  The first
+     node found is written as the "original" node, the remaining nodes
+     are considered its clones.  */
+  written_decls = lto_bitmap_alloc ();
 
   /* Write out the nodes.  We must first output a node and then its clones,
      otherwise at a time reading back the node there would be nothing to clone
Index: tree-ssa-alias.c
===================================================================
--- tree-ssa-alias.c	(revision 159429)
+++ tree-ssa-alias.c	(working copy)
@@ -1061,8 +1061,7 @@ ref_maybe_used_by_call_p_1 (gimple call,
   /* Check if base is a global static variable that is not read
      by the function.  */
   if (TREE_CODE (base) == VAR_DECL
-      && TREE_STATIC (base)
-      && !TREE_PUBLIC (base))
+      && TREE_STATIC (base))
     {
       bitmap not_read;
 
@@ -1316,8 +1315,7 @@ call_may_clobber_ref_p_1 (gimple call, a
      by the function.  */
   if (callee != NULL_TREE
       && TREE_CODE (base) == VAR_DECL
-      && TREE_STATIC (base)
-      && !TREE_PUBLIC (base))
+      && TREE_STATIC (base))
     {
       bitmap not_written;
 
Index: opts.c
===================================================================
--- opts.c	(revision 159429)
+++ opts.c	(working copy)
@@ -1143,7 +1143,6 @@ decode_options (unsigned int argc, const
     {
       /* These passes are not WHOPR compatible yet.  */
       flag_ipa_cp = 0;
-      flag_ipa_reference = 0;
       flag_ipa_type_escape = 0;
       flag_ipa_pta = 0;
       flag_ipa_struct_reorg = 0;
Index: Makefile.in
===================================================================
--- Makefile.in	(revision 159430)
+++ Makefile.in	(working copy)
@@ -2929,7 +2929,7 @@ ipa-reference.o : ipa-reference.c $(CONF
    coretypes.h $(TM_H) $(TREE_H) $(TREE_FLOW_H) $(TREE_INLINE_H) langhooks.h \
    pointer-set.h $(GGC_H) $(IPA_REFERENCE_H) $(IPA_UTILS_H) $(SPLAY_TREE_H) \
    $(GIMPLE_H) $(CGRAPH_H) output.h $(FLAGS_H) $(TREE_PASS_H) \
-   $(TIMEVAR_H) $(DIAGNOSTIC_H) $(FUNCTION_H) $(LTO_STREAMER_H)
+   $(TIMEVAR_H) $(DIAGNOSTIC_H) $(FUNCTION_H) $(LTO_STREAMER_H) $(TOPLEV_H)
 ipa-pure-const.o : ipa-pure-const.c $(CONFIG_H) $(SYSTEM_H) \
    coretypes.h $(TM_H) $(TREE_H) $(TREE_FLOW_H) $(TREE_INLINE_H) langhooks.h \
    pointer-set.h $(GGC_H) $(IPA_UTILS_H) $(TARGET_H) \
Index: passes.c
===================================================================
--- passes.c	(revision 159429)
+++ passes.c	(working copy)
@@ -1695,8 +1695,7 @@ static void
 ipa_write_summaries_1 (cgraph_node_set set, varpool_node_set vset)
 {
   struct lto_out_decl_state *state = lto_new_out_decl_state ();
-  state->cgraph_node_encoder = lto_cgraph_encoder_new ();
-  state->varpool_node_encoder = lto_varpool_encoder_new ();
+  compute_ltrans_boundary (state, set, vset);
 
   lto_push_out_decl_state (state);
 
@@ -1809,8 +1808,8 @@ void
 ipa_write_optimization_summaries (cgraph_node_set set, varpool_node_set vset)
 {
   struct lto_out_decl_state *state = lto_new_out_decl_state ();
-  state->cgraph_node_encoder = lto_cgraph_encoder_new ();
-  state->varpool_node_encoder = lto_varpool_encoder_new ();
+  compute_ltrans_boundary (state, set, vset);
+
   lto_push_out_decl_state (state);
 
   gcc_assert (flag_wpa);
Index: lto-streamer.h
===================================================================
--- lto-streamer.h	(revision 159429)
+++ lto-streamer.h	(working copy)
@@ -865,6 +865,13 @@ bool referenced_from_other_partition_p (
 				        varpool_node_set vset);
 bool reachable_from_other_partition_p (struct cgraph_node *,
 				       cgraph_node_set);
+bool referenced_from_this_partition_p (struct ipa_ref_list *,
+				        cgraph_node_set,
+				        varpool_node_set vset);
+bool reachable_from_this_partition_p (struct cgraph_node *,
+				       cgraph_node_set);
+void compute_ltrans_boundary (struct lto_out_decl_state *state,
+			      cgraph_node_set, varpool_node_set);
 
 
 /* In lto-symtab.c.  */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]