This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC PATCH] cgraph support for late declare variant resolution


Hi!

The following patch is a so far non-working attempt at
#pragma omp declare variant
late resolution.
To sum up, during gimplification we resolve direct calls to their
variants whenever possible.
There can be some cases that can't be resolved at that point yet,
e.g. whether some context depends on if the function will be
a declare simd clone or not (only created during IPA), or if
it depends on whether it will be offloaded or not (also split
during IPA).
This patch creates a dummy static function that is made noipa,
has error_mark_node body so we don't try to do anything with it
and attaches to it control structures that contain the possible
variants together with their scores, has node->declare_variant_alt
flag set on it and rebuilding of cgraph edges sets
caller->declare_variant_alt for anything that calls such functions,
so that post IPA we can decide.
LTO streaming of this isn't implemented yet.
As discussed on IRC, the extra payload, which is quite rare, is not added to
cgraph_node, only 2 bits are actually used there, the rest is in an on the
side hash table.  I haven't used function_summary for that, because a
simple hash table looked simpler.

I was hoping that the
       (*slot)->node->create_reference (varentry->variant, IPA_REF_ADDR);
calls would make sure that the variant cgraph_nodes are considered needed,
but apparently they are removed.

The structures could be perhaps moved to cgraph.h and omp-general.c
could have a function to look it up in the hash table, if cgraph code would
need to access the variants for some reason.

Testcase I was using (-O2 -fopenmp -mno-sse3):
/* { dg-do compile { target vect_simd_clones } } */
/* { dg-additional-options "-mno-sse3" { target { i?86-*-* x86_64-*-* } } } */

int f01 (int);
int f02 (int);
int f03 (int);
#pragma omp declare variant (f01) match (device={isa("avx512f")}) /* 4 or 8 */
#pragma omp declare variant (f02) match (implementation={vendor(score(3):gnu)},device={kind(cpu)}) /* (1 or 2) + 3 */
#pragma omp declare variant (f03) match (implementation={vendor(score(5):gnu)},device={kind(host)}) /* (1 or 2) + 5 */
int f04 (int x) { return x; }

#pragma omp declare simd
int
test1 (int x)
{
  int a = f04 (x);
  int b = f04 (x);
  return a + b;
}
where I'd like to see test1 calling f03 with score 1 + 5 + 1, most of the
test1.simdclone.* (the non-avx512f ones) also f03 with score 2 + 5 + 1
and the last two test1.simdclone.* (avx512f ones) f01 (score 8 + 1).
But the f01/f02/f03 cgraph_nodes are removed and in their place there are
test1.simdclone.* cgraph nodes.

Though on this?  Any other comments?

--- gcc/cgraph.c.jj	2019-11-13 10:54:45.283048134 +0100
+++ gcc/cgraph.c	2019-11-15 22:10:58.845671689 +0100
@@ -890,6 +890,7 @@ symbol_table::create_edge (cgraph_node *
 				      caller->decl);
   else
     edge->in_polymorphic_cdtor = caller->thunk.thunk_p;
+  caller->calls_declare_variant_alt |= callee->declare_variant_alt;
 
   return edge;
 }
--- gcc/Makefile.in.jj	2019-11-14 01:20:24.735562492 +0100
+++ gcc/Makefile.in	2019-11-15 15:41:59.908723643 +0100
@@ -2577,6 +2577,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h
   $(srcdir)/omp-offload.h \
   $(srcdir)/omp-offload.c \
   $(srcdir)/omp-expand.c \
+  $(srcdir)/omp-general.c \
   $(srcdir)/omp-low.c \
   $(srcdir)/targhooks.c $(out_file) $(srcdir)/passes.c $(srcdir)/cgraphunit.c \
   $(srcdir)/cgraphclones.c \
--- gcc/omp-offload.c.jj	2019-09-26 22:02:53.737255157 +0200
+++ gcc/omp-offload.c	2019-11-15 22:44:07.925718820 +0100
@@ -1893,12 +1893,28 @@ execute_omp_device_lower ()
   bool regimplify = false;
   basic_block bb;
   gimple_stmt_iterator gsi;
+  bool calls_declare_variant_alt
+    = cgraph_node::get (cfun->decl)->calls_declare_variant_alt;
   FOR_EACH_BB_FN (bb, cfun)
     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
       {
 	gimple *stmt = gsi_stmt (gsi);
-	if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
+	if (!is_gimple_call (stmt))
 	  continue;
+	if (!gimple_call_internal_p (stmt))
+	  {
+	    if (calls_declare_variant_alt)
+	      if (tree fndecl = gimple_call_fndecl (stmt))
+		{
+		  tree new_fndecl = omp_resolve_declare_variant (fndecl);
+		  if (new_fndecl != fndecl)
+		    {
+		      gimple_call_set_fndecl (stmt, new_fndecl);
+		      update_stmt (stmt);
+		    }
+		}
+	    continue;
+	  }
 	tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
 	tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
 	switch (gimple_call_internal_fn (stmt))
@@ -1992,7 +2008,9 @@ public:
   /* opt_pass methods: */
   virtual bool gate (function *fun)
     {
-      return !(fun->curr_properties & PROP_gimple_lomp_dev);
+      return (!(fun->curr_properties & PROP_gimple_lomp_dev)
+	      || (flag_openmp
+		  && cgraph_node::get (fun->decl)->calls_declare_variant_alt));
     }
   virtual unsigned int execute (function *)
     {
--- gcc/cgraph.h.jj	2019-11-11 08:02:16.146654739 +0100
+++ gcc/cgraph.h	2019-11-15 22:09:43.919800165 +0100
@@ -1486,6 +1486,11 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cg
   unsigned tm_may_enter_irr : 1;
   /* True if this was a clone created by ipa-cp.  */
   unsigned ipcp_clone : 1;
+  /* True if this is the deferred declare variant resolution artificial
+     function.  */
+  unsigned declare_variant_alt : 1;
+  /* True if the function calls declare_variant_alt functions.  */
+  unsigned calls_declare_variant_alt : 1;
 
 private:
   /* Unique id of the node.  */
--- gcc/omp-general.c.jj	2019-11-15 09:31:56.085202778 +0100
+++ gcc/omp-general.c	2019-11-15 23:30:58.398420576 +0100
@@ -642,6 +642,8 @@ omp_maybe_offloaded (void)
   if (symtab->state == PARSING)
     /* Maybe.  */
     return true;
+  if (cfun && cfun->after_inlining)
+    return false;
   if (current_function_decl
       && lookup_attribute ("omp declare target",
 			   DECL_ATTRIBUTES (current_function_decl)))
@@ -694,8 +696,7 @@ omp_context_selector_matches (tree ctx)
 	     (so in most of the cases), and we'd need to maintain set of
 	     surrounding OpenMP constructs, which is better handled during
 	     gimplification.  */
-	  if (symtab->state == PARSING
-	      || (cfun->curr_properties & PROP_gimple_any) != 0)
+	  if (symtab->state == PARSING)
 	    {
 	      ret = -1;
 	      continue;
@@ -704,6 +705,28 @@ omp_context_selector_matches (tree ctx)
 	  enum tree_code constructs[5];
 	  int nconstructs
 	    = omp_constructor_traits_to_codes (TREE_VALUE (t1), constructs);
+
+	  if ((cfun->curr_properties & PROP_gimple_any) != 0)
+	    {
+	      if (!cfun->after_inlining)
+		{
+		  ret = -1;
+		  continue;
+		}
+	      int i;
+	      for (i = 0; i < nconstructs; ++i)
+		if (constructs[i] == OMP_SIMD)
+		  break;
+	      if (i < nconstructs)
+		{
+		  ret = -1;
+		  continue;
+		}
+	      /* If there is no simd, assume it is ok after IPA,
+		 constructs should have been checked before.  */
+	      continue;
+	    }
+
 	  int r = omp_construct_selector_matches (constructs, nconstructs,
 						  NULL);
 	  if (r == 0)
@@ -738,6 +761,9 @@ omp_context_selector_matches (tree ctx)
 	    case 'a':
 	      if (set == 'i' && !strcmp (sel, "atomic_default_mem_order"))
 		{
+		  if ((cfun->curr_properties & PROP_gimple_any) != 0)
+		    break;
+
 		  enum omp_memory_order omo
 		    = ((enum omp_memory_order)
 		       (omp_requires_mask
@@ -816,6 +842,9 @@ omp_context_selector_matches (tree ctx)
 	    case 'u':
 	      if (set == 'i' && !strcmp (sel, "unified_address"))
 		{
+		  if ((cfun->curr_properties & PROP_gimple_any) != 0)
+		    break;
+
 		  if ((omp_requires_mask & OMP_REQUIRES_UNIFIED_ADDRESS) == 0)
 		    {
 		      if (symtab->state == PARSING)
@@ -827,6 +856,9 @@ omp_context_selector_matches (tree ctx)
 		}
 	      if (set == 'i' && !strcmp (sel, "unified_shared_memory"))
 		{
+		  if ((cfun->curr_properties & PROP_gimple_any) != 0)
+		    break;
+
 		  if ((omp_requires_mask
 		       & OMP_REQUIRES_UNIFIED_SHARED_MEMORY) == 0)
 		    {
@@ -841,6 +873,9 @@ omp_context_selector_matches (tree ctx)
 	    case 'd':
 	      if (set == 'i' && !strcmp (sel, "dynamic_allocators"))
 		{
+		  if ((cfun->curr_properties & PROP_gimple_any) != 0)
+		    break;
+
 		  if ((omp_requires_mask
 		       & OMP_REQUIRES_DYNAMIC_ALLOCATORS) == 0)
 		    {
@@ -855,6 +890,9 @@ omp_context_selector_matches (tree ctx)
 	    case 'r':
 	      if (set == 'i' && !strcmp (sel, "reverse_offload"))
 		{
+		  if ((cfun->curr_properties & PROP_gimple_any) != 0)
+		    break;
+
 		  if ((omp_requires_mask & OMP_REQUIRES_REVERSE_OFFLOAD) == 0)
 		    {
 		      if (symtab->state == PARSING)
@@ -944,7 +982,8 @@ omp_context_selector_matches (tree ctx)
 			   #pragma omp declare simd on it, some simd clones
 			   might have the isa added later on.  */
 			if (r == -1
-			    && targetm.simd_clone.compute_vecsize_and_simdlen)
+			    && targetm.simd_clone.compute_vecsize_and_simdlen
+			    && (cfun == NULL || !cfun->after_inlining))
 			  {
 			    tree attrs
 			      = DECL_ATTRIBUTES (current_function_decl);
@@ -1415,6 +1454,191 @@ omp_context_compute_score (tree ctx, wid
   return ret;
 }
 
+/* Class describing a single variant.  */
+struct GTY(()) omp_declare_variant_entry {
+  /* NODE of the variant.  */
+  cgraph_node *variant;
+  /* Score if not in declare simd clone.  */
+  widest_int score;
+  /* Score if in declare simd clone.  */
+  widest_int score_in_declare_simd_clone;
+  /* Context selector for the variant.  */
+  tree ctx;
+  /* True if the context selector is known to match already.  */
+  bool matches;
+};
+
+/* Class describing a function with variants.  */
+struct GTY((for_user)) omp_declare_variant_base_entry {
+  /* NODE of the base function.  */
+  cgraph_node *base;
+  /* NODE of the artificial function created for the deferred variant
+     resolution.  */
+  cgraph_node *node;
+  /* Vector of the variants.  */
+  vec<omp_declare_variant_entry, va_gc> *variants;
+};
+
+struct omp_declare_variant_hasher
+  : ggc_ptr_hash<omp_declare_variant_base_entry> {
+  static hashval_t hash (omp_declare_variant_base_entry *);
+  static bool equal (omp_declare_variant_base_entry *,
+		     omp_declare_variant_base_entry *);
+};
+
+hashval_t
+omp_declare_variant_hasher::hash (omp_declare_variant_base_entry *x)
+{
+  inchash::hash hstate;
+  hstate.add_int (DECL_UID (x->base->decl));
+  hstate.add_int (x->variants->length ());
+  omp_declare_variant_entry *variant;
+  unsigned int i;
+  FOR_EACH_VEC_SAFE_ELT (x->variants, i, variant)
+    {
+      hstate.add_int (DECL_UID (variant->variant->decl));
+      hstate.add_wide_int (variant->score);
+      hstate.add_wide_int (variant->score_in_declare_simd_clone);
+      hstate.add_ptr (variant->ctx);
+      hstate.add_int (variant->matches);
+    }
+  return hstate.end ();
+}
+
+bool
+omp_declare_variant_hasher::equal (omp_declare_variant_base_entry *x,
+				   omp_declare_variant_base_entry *y)
+{
+  if (x->base != y->base
+      || x->variants->length () != y->variants->length ())
+    return false;
+  omp_declare_variant_entry *variant;
+  unsigned int i;
+  FOR_EACH_VEC_SAFE_ELT (x->variants, i, variant)
+    if (variant->variant != (*y->variants)[i].variant
+	|| variant->score != (*y->variants)[i].score
+	|| (variant->score_in_declare_simd_clone
+	    != (*y->variants)[i].score_in_declare_simd_clone)
+	|| variant->ctx != (*y->variants)[i].ctx
+	|| variant->matches != (*y->variants)[i].matches)
+      return false;
+  return true;
+}
+
+static GTY(()) hash_table<omp_declare_variant_hasher> *omp_declare_variants;
+
+struct omp_declare_variant_alt_hasher
+  : ggc_ptr_hash<omp_declare_variant_base_entry> {
+  static hashval_t hash (omp_declare_variant_base_entry *);
+  static bool equal (omp_declare_variant_base_entry *,
+		     omp_declare_variant_base_entry *);
+};
+
+hashval_t
+omp_declare_variant_alt_hasher::hash (omp_declare_variant_base_entry *x)
+{
+  return DECL_UID (x->node->decl);
+}
+
+bool
+omp_declare_variant_alt_hasher::equal (omp_declare_variant_base_entry *x,
+				       omp_declare_variant_base_entry *y)
+{
+  return x->node == y->node;
+}
+
+static GTY(()) hash_table<omp_declare_variant_alt_hasher>
+  *omp_declare_variant_alt;
+
+/* Try to resolve declare variant after gimplification.  */
+
+static tree
+omp_resolve_late_declare_variant (tree alt)
+{
+  cgraph_node *node = cgraph_node::get (alt);
+  cgraph_node *cur_node = cgraph_node::get (cfun->decl);
+  if (node == NULL
+      || !node->declare_variant_alt
+      || !cfun->after_inlining)
+    return alt;
+
+  omp_declare_variant_base_entry entry;
+  entry.base = NULL;
+  entry.node = node;
+  entry.variants = NULL;
+  omp_declare_variant_base_entry *entryp
+    = omp_declare_variant_alt->find_with_hash (&entry, DECL_UID (alt));
+
+  unsigned int i, j;
+  omp_declare_variant_entry *varentry1, *varentry2;
+  auto_vec <bool, 16> matches;
+  unsigned int nmatches = 0;
+  FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry1)
+    {
+      if (varentry1->matches)
+	{
+	  /* This has been checked to be ok already.  */
+	  matches.safe_push (true);
+	  nmatches++;
+	  continue;
+	}
+      switch (omp_context_selector_matches (varentry1->ctx))
+	{
+	case 0:
+          matches.safe_push (false);
+	  break;
+	case -1:
+	  return alt;
+	default:
+	  matches.safe_push (true);
+	  nmatches++;
+	  break;
+	}
+    }
+
+  if (nmatches == 0)
+    return entryp->base->decl;
+
+  /* A context selector that is a strict subset of another context selector
+     has a score of zero.  */
+  FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry1)
+    if (matches[i])
+      {
+        for (j = i + 1;
+	     vec_safe_iterate (entryp->variants, j, &varentry2); ++j)
+	  if (matches[j])
+	    {
+	      int r = omp_context_selector_compare (varentry1->ctx,
+						    varentry2->ctx);
+	      if (r == -1)
+		{
+		  /* ctx1 is a strict subset of ctx2, ignore ctx1.  */
+		  matches[i] = false;
+		  break;
+		}
+	      else if (r == 1)
+		/* ctx2 is a strict subset of ctx1, remove ctx2.  */
+		matches[j] = false;
+	    }
+      }
+
+  widest_int max_score = -1;
+  varentry2 = NULL;
+  FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry1)
+    if (matches[i])
+      {
+	widest_int score
+	  = (cur_node->simdclone ? varentry1->score_in_declare_simd_clone
+	     : varentry1->score);
+	if (score > max_score)
+	  {
+	    max_score = score;
+	    varentry2 = varentry1;
+	  }
+      }
+  return varentry2->variant->decl;
+}
+
 /* Try to resolve declare variant, return the variant decl if it should
    be used instead of base, or base otherwise.  */
 
@@ -1422,6 +1646,9 @@ tree
 omp_resolve_declare_variant (tree base)
 {
   tree variant1 = NULL_TREE, variant2 = NULL_TREE;
+  if (cfun && (cfun->curr_properties & PROP_gimple_any) != 0)
+    return omp_resolve_late_declare_variant (base);
+
   auto_vec <tree, 16> variants;
   auto_vec <bool, 16> defer;
   bool any_deferred = false;
@@ -1459,6 +1686,10 @@ omp_resolve_declare_variant (tree base)
       bool first = true;
       unsigned int i;
       tree attr1, attr2;
+      omp_declare_variant_base_entry entry;
+      entry.base = cgraph_node::get_create (base);
+      entry.node = NULL;
+      vec_alloc (entry.variants, variants.length ());
       FOR_EACH_VEC_ELT (variants, i, attr1)
 	{
 	  widest_int score1;
@@ -1498,6 +1729,14 @@ omp_resolve_declare_variant (tree base)
 		  variant2 = defer[i] ? NULL_TREE : attr1;
 		}
 	    }
+	  omp_declare_variant_entry varentry;
+	  varentry.variant
+	    = cgraph_node::get_create (TREE_PURPOSE (TREE_VALUE (attr1)));
+	  varentry.score = score1;
+	  varentry.score_in_declare_simd_clone = score2;
+	  varentry.ctx = ctx;
+	  varentry.matches = !defer[i];
+	  entry.variants->quick_push (varentry);
 	}
 
       /* If there is a clear winner variant with the score which is not
@@ -1522,17 +1761,66 @@ omp_resolve_declare_variant (tree base)
 		}
 	    }
 	  if (variant1)
-	    return TREE_PURPOSE (TREE_VALUE (variant1));
+	    {
+	      vec_free (entry.variants);
+	      return TREE_PURPOSE (TREE_VALUE (variant1));
+	    }
+	}
+
+      if (omp_declare_variants == NULL)
+	omp_declare_variants
+	  = hash_table<omp_declare_variant_hasher>::create_ggc (64);
+      omp_declare_variant_base_entry **slot
+	= omp_declare_variants->find_slot (&entry, INSERT);
+      if (*slot != NULL)
+	{
+	  vec_free (entry.variants);
+	  return (*slot)->node->decl;
 	}
 
-      return base;
+      *slot = ggc_cleared_alloc<omp_declare_variant_base_entry> ();
+      (*slot)->base = entry.base;
+      (*slot)->node = entry.base;
+      (*slot)->variants = entry.variants;
+      tree alt = build_decl (DECL_SOURCE_LOCATION (base), FUNCTION_DECL,
+			     DECL_NAME (base), TREE_TYPE (base));
+      DECL_ARTIFICIAL (alt) = 1;
+      DECL_IGNORED_P (alt) = 1;
+      TREE_STATIC (alt) = 1;
+      tree attributes = DECL_ATTRIBUTES (base);
+      if (lookup_attribute ("noipa", attributes) == NULL)
+	{
+	  attributes = tree_cons (get_identifier ("noipa"), NULL, attributes);
+	  if (lookup_attribute ("noinline", attributes) == NULL)
+	    attributes = tree_cons (get_identifier ("noinline"), NULL,
+				    attributes);
+	  if (lookup_attribute ("noclone", attributes) == NULL)
+	    attributes = tree_cons (get_identifier ("noclone"), NULL,
+				    attributes);
+	  if (lookup_attribute ("no_icf", attributes) == NULL)
+	    attributes = tree_cons (get_identifier ("no_icf"), NULL,
+				    attributes);
+	}
+      DECL_ATTRIBUTES (alt) = attributes;
+      DECL_INITIAL (alt) = error_mark_node;
+      (*slot)->node = cgraph_node::create (alt);
+      (*slot)->node->declare_variant_alt = 1;
+      omp_declare_variant_entry *varentry;
+      FOR_EACH_VEC_SAFE_ELT (entry.variants, i, varentry)
+	(*slot)->node->create_reference (varentry->variant, IPA_REF_ADDR);
+      if (omp_declare_variant_alt == NULL)
+	omp_declare_variant_alt
+	  = hash_table<omp_declare_variant_alt_hasher>::create_ggc (64);
+      *omp_declare_variant_alt->find_slot_with_hash (*slot, DECL_UID (alt),
+						     INSERT) = *slot;
+      return alt;
     }
 
   if (variants.length () == 1)
     return TREE_PURPOSE (TREE_VALUE (variants[0]));
 
-  /* A context selector that is a strict subset of another context selector has a score
-     of zero.  */
+  /* A context selector that is a strict subset of another context selector
+     has a score of zero.  */
   tree attr1, attr2;
   unsigned int i, j;
   FOR_EACH_VEC_ELT (variants, i, attr1)
@@ -1935,3 +2223,5 @@ oacc_get_ifn_dim_arg (const gimple *stmt
   gcc_checking_assert (axis >= 0 && axis < GOMP_DIM_MAX);
   return (int) axis;
 }
+
+#include "gt-omp-general.h"
--- gcc/omp-simd-clone.c.jj	2019-10-30 10:49:37.182013593 +0100
+++ gcc/omp-simd-clone.c	2019-11-15 23:17:59.335139849 +0100
@@ -477,6 +477,7 @@ simd_clone_create (struct cgraph_node *o
      the old node.  */
   new_node->local = old_node->local;
   new_node->externally_visible = old_node->externally_visible;
+  new_node->calls_declare_variant_alt = old_node->calls_declare_variant_alt;
 
   return new_node;
 }

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]