This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[trans-mem] ipa pass for tm function cloning


Here's a simple pass to clone available functions called within a transaction. The clones are re-written so that all memory accesses are transactionalized.

I'm pretty well baffled by how complicated the IPA hooks are. What I have seems to work, but alternately there seems to be 99 other ways I could have achieved the same effect, and I've no real idea if what I have is best. I've mostly patterned it off the const-pure pass, fwiw.


r~
	* cgraph.h (struct cgraph_edge): Steal a bit from loop_nest
	for tm_atomic_call.
	* cgraphbuild.c: Revert entire file.
	* gimple-pretty-print.c (dump_gimple_call): Dump in-atomic.
	* gimple.h (GF_CALL_IN_TM_ATOMIC): New.
	(gimple_call_set_in_tm_atomic, gimple_call_in_tm_atomic_p): New.
	* print-tree.c (print_node): Dump tm-clone.
	* trans-mem.c (examine_call_tm): Set tm-atomic in the call stmt.
	(add_stmt_to_tm_region): New.
	(find_tm_clone): Remove.
	(expand_call_tm): Don't call it.  Exit for DECL_IS_TM_CLONE.
	(function_insertion_hook_holder, ipa_tm_analyze_function,
	ipa_tm_add_new_function, ipa_tm_generate_summary,
	ipa_tm_create_version, ipa_tm_decide_version, ipa_tm_execute): New.
	(pass_ipa_tm): Use full IPA_PASS.
	* tree-pass.h: Update to match.
	* tree-cfg.c (is_ctrl_altering_stmt): TM_OPS functions only
	throw (visibly) if they have an eh region number.
	* tree-inline.c (tree_versionable_function_p): Don't check for
	inlining.


--- cgraph.h	(revision 141421)
+++ cgraph.h	(local)
@@ -208,12 +208,14 @@ struct cgraph_edge GTY((chain_next ("%h.
      When set to CGRAPH_FREQ_BASE, the edge is expected to be called once
      per function call.  The range is 0 to CGRAPH_FREQ_MAX.  */
   int frequency;
+  /* Unique id of the edge.  */
+  int uid;
   /* Depth of loop nest, 1 means no loop nest.  */
-  unsigned int loop_nest : 31;
+  unsigned int loop_nest : 30;
   /* Whether this edge describes a call that was originally indirect.  */
   unsigned int indirect_call : 1;
-  /* Unique id of the edge.  */
-  int uid;
+  /* Whether this edge describes a call from within a TM_ATOMIC region.  */
+  unsigned int tm_atomic_call : 1;
 };
 
 #define CGRAPH_FREQ_BASE 1000
--- cgraphbuild.c	(revision 141421)
+++ cgraphbuild.c	(local)
@@ -122,81 +122,14 @@ compute_call_stmt_bb_frequency (basic_bl
   return freq;
 }
 
-/* Eagerly clone functions so that TM expansion can create
-   and redirect calls to a transactional clone.  */
-
-static void ATTRIBUTE_UNUSED
-prepare_tm_clone (struct cgraph_node *node)
-{
-  struct cgraph_node *tm_node;
-  tree decl, old_decl, id;
-  struct function *saved_cfun;
-
-  if (!flag_tm || flag_openmp)
-    return;
-
-  /* No need for a TM clone of the main function */
-  if (MAIN_NAME_P (DECL_NAME (node->decl)))
-    return; 
-
-  /* Do not prepare functions that are already instances 
-     of an original function decl for inlining. */
-  if (DECL_ABSTRACT_ORIGIN (node->decl) 
-      || DECL_EXTERNAL (node->decl) 
-      || DECL_VIRTUAL_P (node->decl) 
-      || DECL_ARTIFICIAL (node->decl))
-    return;
-
-  /* Clone whole function tree and set TM marker bit. */
-
-  /* Defer redirecting callers of the node to the
-     new versioned node to the tm expansion pass.  */
-  tm_node = cgraph_function_versioning (node, NULL, NULL, NULL); 
-  if (tm_node == NULL)
-    return;
-
-  decl = tm_node->decl;
-  node->next_clone = tm_node;
-  DECL_IS_TM_CLONE (decl) = 1;
-  cgraph_mark_needed_node (tm_node);
-
-  old_decl = current_function_decl;
-
-  current_function_decl = decl;
-  saved_cfun = cfun;
-  set_cfun (DECL_STRUCT_FUNCTION (decl));
-
-  /* Substitute decl name. */
-  {
-    char *tm_name;
-
-#if !defined(NO_DOT_IN_LABEL) && !defined(NO_DOLLAR_IN_LABEL)
-# define TM_SUFFIX	".$TXN"
-#elif !defined(NO_DOT_IN_LABEL)
-# define TM_SUFFIX	".TXN"
-#elif !defined(NO_DOLLAR_IN_LABEL)
-# define TM_SUFFIX	"$TXN"
-#else
-# define TM_SUFFIX	"__TXN"
-#endif
-
-    tm_name = concat (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)),
-		      TM_SUFFIX, NULL);
-    id = get_identifier (tm_name);
-    SET_DECL_ASSEMBLER_NAME (decl, id);
-    free (tm_name);
-  }
-
-  set_cfun (saved_cfun);
-  current_function_decl = old_decl;
-}
-
+/* Create cgraph edges for function calls.
+   Also look for functions and variables having addresses taken.  */
 
-/* Helper function doing the work for build_cgraph_edge */
-static void
-build_cgraph_edges_from_node (struct cgraph_node *node)
+static unsigned int
+build_cgraph_edges (void)
 {
   basic_block bb;
+  struct cgraph_node *node = cgraph_node (current_function_decl);
   struct pointer_set_t *visited_nodes = pointer_set_create ();
   gimple_stmt_iterator gsi;
   tree step;
@@ -263,20 +196,6 @@ build_cgraph_edges_from_node (struct cgr
 
   pointer_set_destroy (visited_nodes);
   initialize_inline_failed (node);
-}
-
-/* Create cgraph edges for function calls.
-   Also look for functions and variables having addresses taken.  */
-
-static unsigned int
-build_cgraph_edges (void)
-{
-  struct cgraph_node *node = cgraph_node (current_function_decl);
-
-  build_cgraph_edges_from_node (node);
-  
-  /* prepare_tm_clone (node); */
-
   return 0;
 }
 
--- gimple-pretty-print.c	(revision 141421)
+++ gimple-pretty-print.c	(local)
@@ -498,9 +498,10 @@ dump_gimple_call (pretty_printer *buffer
 
   if (gimple_call_return_slot_opt_p (gs))
     pp_string (buffer, " [return slot optimization]");
-
   if (gimple_call_tail_p (gs))
     pp_string (buffer, " [tail call]");
+  if (gimple_call_in_tm_atomic_p (gs))
+    pp_string (buffer, " [in atomic]");
 }
 
 
--- gimple.h	(revision 141421)
+++ gimple.h	(local)
@@ -104,6 +104,7 @@ enum gf_mask {
     GF_CALL_RETURN_SLOT_OPT	= 1 << 2,
     GF_CALL_TAILCALL		= 1 << 3,
     GF_CALL_VA_ARG_PACK		= 1 << 4,
+    GF_CALL_IN_TM_ATOMIC	= 1 << 5,
     GF_OMP_PARALLEL_COMBINED	= 1 << 0,
 
     /* True on an GIMPLE_OMP_RETURN statement if the return does not require
@@ -2222,6 +2223,31 @@ gimple_call_va_arg_pack_p (gimple s)
 }
 
 
+/* If IN_TM_ATOMIC_P is true, GIMPLE_CALL S is within the dynamic scope of
+   a GIMPLE_TM_ATOMIC transaction.  */
+
+static inline void
+gimple_call_set_in_tm_atomic (gimple s, bool in_tm_atomic_p)
+{
+  GIMPLE_CHECK (s, GIMPLE_CALL);
+  if (in_tm_atomic_p)
+    s->gsbase.subcode |= GF_CALL_IN_TM_ATOMIC;
+  else
+    s->gsbase.subcode &= ~GF_CALL_IN_TM_ATOMIC;
+}
+  
+
+/* Return true if GIMPLE_CALL S is within the dynamic scope of
+   a transaction.  */
+
+static inline bool
+gimple_call_in_tm_atomic_p (gimple s)
+{
+  GIMPLE_CHECK (s, GIMPLE_CALL);
+  return (s->gsbase.subcode & GF_CALL_IN_TM_ATOMIC) != 0;
+}
+
+
 /* Return true if S is a noreturn call.  */
 
 static inline bool
--- print-tree.c	(revision 141421)
+++ print-tree.c	(local)
@@ -382,6 +382,8 @@ print_node (FILE *file, const char *pref
 	fputs (" built-in", file);
       if (TREE_CODE (node) == FUNCTION_DECL && DECL_NO_STATIC_CHAIN (node))
 	fputs (" no-static-chain", file);
+      if (TREE_CODE (node) == FUNCTION_DECL && DECL_IS_TM_CLONE (node))
+	fputs (" tm-clone", file);
 
       if (TREE_CODE (node) == FIELD_DECL && DECL_PACKED (node))
 	fputs (" packed", file);
@@ -448,10 +450,10 @@ print_node (FILE *file, const char *pref
 	  fprintf (file, " %s", GET_MODE_NAME (mode));
 	}
 
-      if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)  && DECL_DEFER_OUTPUT (node))
+      if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)
+	  && DECL_DEFER_OUTPUT (node))
 	fputs (" defer-output", file);
 
-
       xloc = expand_location (DECL_SOURCE_LOCATION (node));
       fprintf (file, " file %s line %d col %d", xloc.file, xloc.line,
 	       xloc.column);
--- trans-mem.c	(revision 141421)
+++ trans-mem.c	(local)
@@ -170,6 +170,8 @@ examine_call_tm (unsigned *state, gimple
   tree fn_decl;
   unsigned flags;
 
+  gimple_call_set_in_tm_atomic (stmt, true);
+
   flags = gimple_call_flags (stmt);
   if (flags & ECF_CONST)
     return;
@@ -545,6 +547,16 @@ tm_atomic_subcode_ior (struct tm_region 
 }
 
 
+/* Add STMT to the EH region for the given TM region.  */
+
+static void
+add_stmt_to_tm_region (struct tm_region *region, gimple stmt)
+{
+  if (region->region_nr >= 0)
+    add_stmt_to_eh_region (stmt, region->region_nr);
+}
+
+
 /* Construct a call to TM_IRREVOKABLE and insert it before GSI.  */
 
 static void
@@ -555,7 +567,7 @@ expand_irrevokable (struct tm_region *re
   tm_atomic_subcode_ior (region, GTMA_HAVE_CALL_IRREVOKABLE);
 
   g = gimple_build_call (built_in_decls[BUILT_IN_TM_IRREVOKABLE], 0);
-  add_stmt_to_eh_region (g, region->region_nr);
+  add_stmt_to_tm_region (region, g);
 
   gsi_insert_before (gsi, g, GSI_SAME_STMT);
 }
@@ -739,18 +751,12 @@ expand_assign_tm (struct tm_region *regi
   else
     return;
 
-  add_stmt_to_eh_region  (gcall, region->region_nr);
+  add_stmt_to_tm_region  (region, gcall);
   mark_vops_in_stmt (stmt);
   gsi_remove (gsi, true);
 }
 
 
-static tree
-find_tm_clone (tree orig_decl ATTRIBUTE_UNUSED)
-{
-  return NULL_TREE;
-}
-
 /* Expand a call statement as appropriate for a transaction.  That is,
    either verify that the call does not affect the transaction, or
    redirect the call to a clone that handles transactions, or change
@@ -781,7 +787,7 @@ expand_call_tm (struct tm_region *region
       return false;
     }
 
-  if (DECL_IS_TM_PURE (fn_decl))
+  if (DECL_IS_TM_PURE (fn_decl) || DECL_IS_TM_CLONE (fn_decl))
     return false;
 
   if (DECL_BUILT_IN_CLASS (fn_decl) == BUILT_IN_NORMAL)
@@ -809,13 +815,6 @@ expand_call_tm (struct tm_region *region
       return false;
     }
 
-  fn_decl = find_tm_clone (fn_decl);
-  if (fn_decl)
-    {
-      gimple_call_set_fndecl (stmt, fn_decl);
-      return false;
-    }
-
   expand_irrevokable (region, gsi);
   return false;
 }
@@ -1140,20 +1139,172 @@ struct gimple_opt_pass pass_tm_memopt =
 };
 
 
+static struct cgraph_node_hook_list *function_insertion_hook_holder;
+
+static void
+ipa_tm_analyze_function (struct cgraph_node *node)
+{
+  struct cgraph_edge *e;
+
+  if (cgraph_function_body_availability (node) < AVAIL_OVERWRITABLE)
+    return;
+
+  /* If this is a transaction clone, then by definition we're already
+     inside a transaction, and thus by definition all of our callees
+     are within a transaction.  */
+  if (DECL_IS_TM_CLONE (node->decl))
+    {
+      for (e = node->callees; e ; e = e->next_callee)
+	e->tm_atomic_call = 1;
+    }
+
+  /* Otherwise, scan all blocks and transfer the IN-ATOMIC bit we set
+     on the call statement to the cgraph edge.  */
+  else
+    {
+      basic_block bb;
+
+      FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
+	{
+	  gimple_stmt_iterator gsi;
+	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	    {
+	      gimple stmt = gsi_stmt (gsi);
+	      if (is_gimple_call (stmt)
+		  && gimple_call_in_tm_atomic_p (stmt))
+		{
+		  e = cgraph_edge (node, stmt);
+		  e->tm_atomic_call = 1;
+		}
+	    }
+	}
+    }
+}
+
+static void
+ipa_tm_add_new_function (struct cgraph_node *node, void * ARG_UNUSED (data))
+{
+  ipa_tm_analyze_function (node);
+}
+
+static void
+ipa_tm_generate_summary (void)
+{
+  struct cgraph_node *node;
+
+  function_insertion_hook_holder =
+    cgraph_add_function_insertion_hook (&ipa_tm_add_new_function, NULL);
+
+  for (node = cgraph_nodes; node; node = node->next)
+    if (node->lowered)
+      ipa_tm_analyze_function (node);
+}
+
+static void
+ipa_tm_create_version (struct cgraph_node *old_node,
+		       VEC (cgraph_edge_p, heap) *redirections)
+{
+  struct cgraph_node *new_node;
+  char *tm_name;
+
+  new_node = cgraph_function_versioning (old_node, redirections,
+					 NULL, NULL);
+
+  /* ??? Versioning can fail at the discression of the inliner.  */
+  if (new_node == NULL)
+    return;
+
+  /* The generic versioning code forces the function to be visible
+     only within this translation unit.  This isn't what we want for
+     functions the programmer marked TM_CALLABLE.  */
+  if (cgraph_is_master_clone (old_node)
+      && DECL_IS_TM_CALLABLE (old_node->decl))
+    {
+      DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl);
+      TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
+      DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl);
+
+      new_node->local.externally_visible = old_node->local.externally_visible;
+      new_node->local.local = old_node->local.local;
+    }
+
+  DECL_IS_TM_CLONE (new_node->decl) = 1;
+
+  /* ??? In tree_function_versioning, we futzed with the DECL_NAME.  I'm
+     not sure why we did this, as it's surely going to destroy any hope
+     of debugging.  */
+  DECL_NAME (new_node->decl) = DECL_NAME (old_node->decl);
+
+  /* ??? The current Intel ABI for these symbols uses this first variant.
+     I believe we ought to be considering _ZGT{t,n,m} extensions to the
+     C++ name mangling ABI.  */
+#if !defined(NO_DOT_IN_LABEL) && !defined(NO_DOLLAR_IN_LABEL)
+# define TM_SUFFIX	".$TXN"
+#elif !defined(NO_DOT_IN_LABEL)
+# define TM_SUFFIX	".TXN"
+#elif !defined(NO_DOLLAR_IN_LABEL)
+# define TM_SUFFIX	"$TXN"
+#else
+# define TM_SUFFIX	"__TXN"
+#endif
+
+  tm_name = concat (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (old_node->decl)),
+		    TM_SUFFIX, NULL);
+  SET_DECL_ASSEMBLER_NAME (new_node->decl, get_identifier (tm_name));
+  free (tm_name);
+}
+
+static void
+ipa_tm_decide_version (struct cgraph_node *node)
+{
+  VEC (cgraph_edge_p, heap) *redirections = NULL;
+  cgraph_edge_p e;
+
+  /* Don't re-process transaction clones.  */
+  if (DECL_IS_TM_CLONE (node->decl))
+    return;
+
+  /* Collect a vector of all the call sites that are within transactions.  */
+  for (e = node->callers; e ; e = e->next_caller)
+    if (e->tm_atomic_call)
+      VEC_safe_push (cgraph_edge_p, heap, redirections, e);
+
+  /* Create a transaction version if the programmer has explicitly
+     requested one.  Create a transaction version if the version of
+     the function defined here is known to be used, and it has
+     transaction callers.  */
+  if ((cgraph_is_master_clone (node) && DECL_IS_TM_CALLABLE (node->decl))
+      || (cgraph_function_body_availability (node) >= AVAIL_AVAILABLE
+	  && !VEC_empty (cgraph_edge_p, redirections)))
+    {
+      ipa_tm_create_version (node, redirections);
+    }
+
+  VEC_free (cgraph_edge_p, heap, redirections);
+}
 
 static unsigned int
-execute_ipa_tm (void)
+ipa_tm_execute (void)
 {
+  struct cgraph_node *node;
+
+  cgraph_remove_function_insertion_hook (function_insertion_hook_holder);
+
+  for (node = cgraph_nodes; node; node = node->next)
+    if (node->lowered
+        && (node->needed || node->reachable))
+      ipa_tm_decide_version (node);
+
   return 0;
 }
 
-struct simple_ipa_opt_pass pass_ipa_tm =
+struct ipa_opt_pass pass_ipa_tm =
 {
  {
-  SIMPLE_IPA_PASS,
+  IPA_PASS,
   "tmipa",				/* name */
   gate_tm,				/* gate */
-  execute_ipa_tm,			/* execute */
+  ipa_tm_execute,			/* execute */
   NULL,					/* sub */
   NULL,					/* next */
   0,					/* static_pass_number */
@@ -1163,5 +1314,12 @@ struct simple_ipa_opt_pass pass_ipa_tm =
   0,					/* properties_destroyed */
   0,					/* todo_flags_start */
   0,					/* todo_flags_finish */
- }
+ },
+ ipa_tm_generate_summary,		/* generate_summary */
+ NULL,					/* write_summary */
+ NULL,					/* read_summary */
+ NULL,					/* function_read_summary */
+ 0,					/* TODOs */
+ NULL,					/* function_transform */
+ NULL,					/* variable_transform */
 };
--- tree-cfg.c	(revision 141421)
+++ tree-cfg.c	(local)
@@ -2600,10 +2600,13 @@ is_ctrl_altering_stmt (gimple t)
 	if (!(flags & (ECF_CONST | ECF_PURE)) && cfun->has_nonlocal_label)
 	  return true;
 
-	/* A call also alters control flow if it does not return.
-	   A call alters control flow if it may generate a
+	/* A call also alters control flow if it does not return.  */
+	if (flags & ECF_NORETURN)
+	  return true;
+
+	/* A call alters control flow if it may generate a
 	   transaction restart.  */
-	if (flags & (ECF_NORETURN | ECF_TM_OPS))
+	if ((flags & ECF_TM_OPS) && lookup_stmt_eh_region (t) >= 0)
 	  return true;
       }
       break;
--- tree-inline.c	(revision 141421)
+++ tree-inline.c	(local)
@@ -4184,10 +4184,6 @@ tree_versionable_function_p (tree fndecl
 {
   if (fndecl == NULL_TREE)
     return false;
-  /* ??? There are cases where a function is
-     uninlinable but can be versioned.  */
-  if (!tree_inlinable_function_p (fndecl))
-    return false;
   
   return true;
 }
--- tree-pass.h	(revision 141421)
+++ tree-pass.h	(local)
@@ -399,6 +399,7 @@ extern struct ipa_opt_pass pass_ipa_inli
 extern struct ipa_opt_pass pass_ipa_cp;
 extern struct ipa_opt_pass pass_ipa_reference;
 extern struct ipa_opt_pass pass_ipa_pure_const;
+extern struct ipa_opt_pass pass_ipa_tm;
 
 extern struct simple_ipa_opt_pass pass_ipa_matrix_reorg;
 extern struct simple_ipa_opt_pass pass_ipa_early_inline;
@@ -408,7 +409,6 @@ extern struct simple_ipa_opt_pass pass_i
 extern struct simple_ipa_opt_pass pass_early_local_passes;
 extern struct simple_ipa_opt_pass pass_ipa_increase_alignment;
 extern struct simple_ipa_opt_pass pass_ipa_function_and_variable_visibility;
-extern struct simple_ipa_opt_pass pass_ipa_tm;
 
 extern struct gimple_opt_pass pass_all_optimizations;
 extern struct gimple_opt_pass pass_cleanup_cfg_post_optimizing;

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]