[RFH] LTO cgraph support for late declare variant resolution

Jakub Jelinek jakub@redhat.com
Thu May 14 12:17:45 GMT 2020


Hi!

I've committed the patch, so that the rest can be handled incrementally.

On Wed, May 13, 2020 at 01:16:42PM +0200, Jakub Jelinek wrote:
> Honza/Martin, are the cgraph related changes acceptable to you?
> 
> For LTO, the patch only saves/restores the two cgraph_node bits added in the
> patch, but doesn't yet stream out and back in the on the side info for the
> declare_variant_alt.  For the LTO partitioning, I believe those artificial
> FUNCTION_DECLs with declare_variant_alt need to go into partition together
> with anything that calls them (possibly duplicated), any way how to achieve
> that?  Say if declare variant artificial fn foobar is directly
> called from all of foo, bar and baz and not from qux and we want 4
> partitions, one for each of foo, bar, baz, qux, then foobar is needed in the
> first 3 partitions, and the IPA_REF_ADDRs recorded for foobar that right
> after IPA the foobar call will be replaced with calls to foobar1, foobar2,
> foobar3 or foobar (non-artificial) can of course stay in different
> partitions if needed.

I've tried to add the saving/restoring next to ipa refs saving/restoring, as
the declare variant alt stuff is kind of extension of those, unfortunately
following doesn't compile, because I need to also write or read a tree there
(ctx is a portion of DECL_ATTRIBUTES of the base function), but the ipa refs
write/read back functions don't have arguments that can be used for that.

Any idea where to do it instead (for all cgraph_nodes with
declare_variant_alt call the function to write it which needs to contain
a few other cgraph_nodes (duplicated also in the ipa_refs), some widest_ints,
one tree and some booleans)?

Also, do I need to do anything special to avoid LTO merging those artificial
decls?  It is just fine if their ipa refs are merged, but the artificial
vars would be fine only if they are the same (could use the other hash table
for that).

--- gcc/symtab.c.jj	2020-04-20 15:51:19.005560662 +0200
+++ gcc/symtab.c	2020-05-14 12:25:41.530745061 +0200
@@ -1984,7 +1984,7 @@ symtab_node::get_partitioning_class (voi
   if (DECL_ABSTRACT_P (decl))
     return SYMBOL_EXTERNAL;
 
-  if (cnode && cnode->inlined_to)
+  if (cnode && (cnode->inlined_to || cnode->declare_variant_alt))
     return SYMBOL_DUPLICATE;
 
   /* Transparent aliases are always duplicated.  */
--- gcc/lto-cgraph.c.jj	2020-05-14 09:58:21.353412170 +0200
+++ gcc/lto-cgraph.c	2020-05-14 12:39:01.592642219 +0200
@@ -766,6 +766,9 @@ output_refs (lto_symtab_encoder_t encode
 	  for (int i = 0; node->iterate_reference (i, ref); i++)
 	    lto_output_ref (ob, ref, encoder);
 	}
+      if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
+	if (cnode->declare_variant_alt)
+	  omp_lto_output_declare_variant_alt (ob, cnode, encoder);
     }
 
   streamer_write_uhwi_stream (ob->main_stream, 0);
@@ -1610,6 +1613,9 @@ input_refs (class lto_input_block *ib,
 	  input_ref (ib, node, nodes);
 	  count--;
 	}
+      if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
+	if (cnode->declare_variant_alt)
+	  omp_lto_input_declare_variant_alt (ib, cnode, nodes);
     }
 }
 	    
--- gcc/omp-general.c.jj	2020-05-14 09:58:21.394411547 +0200
+++ gcc/omp-general.c	2020-05-14 13:14:09.338841298 +0200
@@ -42,6 +42,8 @@ along with GCC; see the file COPYING3.
 #include "hsa-common.h"
 #include "tree-pass.h"
 #include "omp-device-properties.h"
+#include "data-streamer.h"
+#include "streamer-hooks.h"
 
 enum omp_requires omp_requires_mask;
 
@@ -1898,6 +1900,91 @@ omp_resolve_declare_variant (tree base)
 	  ? TREE_PURPOSE (TREE_VALUE (variant1)) : base);
 }
 
+void
+omp_lto_output_declare_variant_alt (lto_simple_output_block *ob,
+				    cgraph_node *node,
+				    lto_symtab_encoder_t encoder)
+{
+  gcc_assert (node->declare_variant_alt);
+
+  omp_declare_variant_base_entry entry;
+  entry.base = NULL;
+  entry.node = node;
+  entry.variants = NULL;
+  omp_declare_variant_base_entry *entryp
+    = omp_declare_variant_alt->find_with_hash (&entry, DECL_UID (node->decl));
+  gcc_assert (entryp);
+
+  int nbase = lto_symtab_encoder_lookup (encoder, entryp->base);
+  gcc_assert (nbase != LCC_NOT_FOUND);
+  streamer_write_hwi_stream (ob->main_stream, nbase);
+
+  streamer_write_hwi_stream (ob->main_stream, entryp->variants->length ());
+
+  unsigned int i;
+  omp_declare_variant_entry *varentry;
+  FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry)
+    {
+      int nvar = lto_symtab_encoder_lookup (encoder, varentry->variant);
+      gcc_assert (nvar != LCC_NOT_FOUND);
+      streamer_write_hwi_stream (ob->main_stream, nvar);
+
+      for (widest_int *w = &varentry->score; ;
+	   w = &varentry->score_in_declare_simd_clone)
+	{
+	  unsigned len = w->get_len ();
+	  streamer_write_hwi_stream (ob->main_stream, len);
+	  const HOST_WIDE_INT *val = w->get_val ();
+	  for (unsigned j = 0; j < len; j++)
+	    streamer_write_hwi_stream (ob->main_stream, val[j]);
+	  if (w == &varentry->score_in_declare_simd_clone)
+	    break;
+	}
+
+      stream_write_tree (ob, varentry->ctx, false);
+      streamer_write_hwi_stream (ob->main_stream, varentry->matches);
+    }
+}
+
+void
+omp_lto_input_declare_variant_alt (lto_input_block *ib, cgraph_node *node,
+				   vec<symtab_node *> nodes)
+{
+  gcc_assert (node->declare_variant_alt);
+  omp_declare_variant_base_entry *entryp
+    = ggc_cleared_alloc<omp_declare_variant_base_entry> ();
+  entryp->base = dyn_cast<cgraph_node *> (nodes[streamer_read_hwi (ib)]);
+  entryp->node = node;
+  unsigned int len = streamer_read_hwi (ib);
+  vec_alloc (entryp->variants, len);
+
+  for (unsigned int i = 0; i < len; i++)
+    {
+      omp_declare_variant_entry varentry;
+      varentry.variant
+	= dyn_cast<cgraph_node *> (nodes[streamer_read_hwi (ib)]);
+      for (widest_int *w = &varentry.score; ;
+	   w = &varentry.score_in_declare_simd_clone)
+	{
+	  unsigned len2 = streamer_read_hwi (ib);
+	  HOST_WIDE_INT arr[WIDE_INT_MAX_ELTS];
+	  gcc_assert (len2 <= WIDE_INT_MAX_ELTS);
+	  for (unsigned int j = 0; j < len2; j++)
+	    arr[j] = streamer_read_hwi (ib);
+	  *w = widest_int::from_array (arr, len2, true);
+	  if (w == &varentry.score_in_declare_simd_clone)
+	    break;
+	}
+      varentry.ctx = stream_read_tree (ib, /*data_in*/NULL);
+      varentry.matches = streamer_read_hwi (ib) != 0;
+      entryp->variants->quick_push (varentry);
+    }
+  if (omp_declare_variant_alt == NULL)
+    omp_declare_variant_alt
+      = hash_table<omp_declare_variant_alt_hasher>::create_ggc (64);
+  *omp_declare_variant_alt->find_slot_with_hash (entryp, DECL_UID (node->decl),
+						 INSERT) = entryp;
+}
 
 /* Encode an oacc launch argument.  This matches the GOMP_LAUNCH_PACK
    macro on gomp-constants.h.  We do not check for overflow.  */


	Jakub



More information about the Gcc-patches mailing list