This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[trans-mem] ipa pass for tm function cloning
- From: Richard Henderson <rth at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 29 Oct 2008 12:56:21 -0700
- Subject: [trans-mem] ipa pass for tm function cloning
Here's a simple pass to clone available functions called within a
transaction. The clones are re-written so that all memory accesses are
transactionalized.
I'm pretty well baffled by how complicated the IPA hooks are. What I
have seems to work, but alternately there seems to be 99 other ways I
could have achieved the same effect, and I've no real idea if what I
have is best. I've mostly patterned it off the const-pure pass, fwiw.
r~
* cgraph.h (struct cgraph_edge): Steal a bit from loop_nest
for tm_atomic_call.
* cgraphbuild.c: Revert entire file.
* gimple-pretty-print.c (dump_gimple_call): Dump in-atomic.
* gimple.h (GF_CALL_IN_TM_ATOMIC): New.
(gimple_call_set_in_tm_atomic, gimple_call_in_tm_atomic_p): New.
* print-tree.c (print_node): Dump tm-clone.
* trans-mem.c (examine_call_tm): Set tm-atomic in the call stmt.
(add_stmt_to_tm_region): New.
(find_tm_clone): Remove.
(expand_call_tm): Don't call it. Exit for DECL_IS_TM_CLONE.
(function_insertion_hook_holder, ipa_tm_analyze_function,
ipa_tm_add_new_function, ipa_tm_generate_summary,
ipa_tm_create_version, ipa_tm_decide_version, ipa_tm_execute): New.
(pass_ipa_tm): Use full IPA_PASS.
* tree-pass.h: Update to match.
* tree-cfg.c (is_ctrl_altering_stmt): TM_OPS functions only
throw (visibly) if they have an eh region number.
* tree-inline.c (tree_versionable_function_p): Don't check for
inlining.
--- cgraph.h (revision 141421)
+++ cgraph.h (local)
@@ -208,12 +208,14 @@ struct cgraph_edge GTY((chain_next ("%h.
When set to CGRAPH_FREQ_BASE, the edge is expected to be called once
per function call. The range is 0 to CGRAPH_FREQ_MAX. */
int frequency;
+ /* Unique id of the edge. */
+ int uid;
/* Depth of loop nest, 1 means no loop nest. */
- unsigned int loop_nest : 31;
+ unsigned int loop_nest : 30;
/* Whether this edge describes a call that was originally indirect. */
unsigned int indirect_call : 1;
- /* Unique id of the edge. */
- int uid;
+ /* Whether this edge describes a call from within a TM_ATOMIC region. */
+ unsigned int tm_atomic_call : 1;
};
#define CGRAPH_FREQ_BASE 1000
--- cgraphbuild.c (revision 141421)
+++ cgraphbuild.c (local)
@@ -122,81 +122,14 @@ compute_call_stmt_bb_frequency (basic_bl
return freq;
}
-/* Eagerly clone functions so that TM expansion can create
- and redirect calls to a transactional clone. */
-
-static void ATTRIBUTE_UNUSED
-prepare_tm_clone (struct cgraph_node *node)
-{
- struct cgraph_node *tm_node;
- tree decl, old_decl, id;
- struct function *saved_cfun;
-
- if (!flag_tm || flag_openmp)
- return;
-
- /* No need for a TM clone of the main function */
- if (MAIN_NAME_P (DECL_NAME (node->decl)))
- return;
-
- /* Do not prepare functions that are already instances
- of an original function decl for inlining. */
- if (DECL_ABSTRACT_ORIGIN (node->decl)
- || DECL_EXTERNAL (node->decl)
- || DECL_VIRTUAL_P (node->decl)
- || DECL_ARTIFICIAL (node->decl))
- return;
-
- /* Clone whole function tree and set TM marker bit. */
-
- /* Defer redirecting callers of the node to the
- new versioned node to the tm expansion pass. */
- tm_node = cgraph_function_versioning (node, NULL, NULL, NULL);
- if (tm_node == NULL)
- return;
-
- decl = tm_node->decl;
- node->next_clone = tm_node;
- DECL_IS_TM_CLONE (decl) = 1;
- cgraph_mark_needed_node (tm_node);
-
- old_decl = current_function_decl;
-
- current_function_decl = decl;
- saved_cfun = cfun;
- set_cfun (DECL_STRUCT_FUNCTION (decl));
-
- /* Substitute decl name. */
- {
- char *tm_name;
-
-#if !defined(NO_DOT_IN_LABEL) && !defined(NO_DOLLAR_IN_LABEL)
-# define TM_SUFFIX ".$TXN"
-#elif !defined(NO_DOT_IN_LABEL)
-# define TM_SUFFIX ".TXN"
-#elif !defined(NO_DOLLAR_IN_LABEL)
-# define TM_SUFFIX "$TXN"
-#else
-# define TM_SUFFIX "__TXN"
-#endif
-
- tm_name = concat (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)),
- TM_SUFFIX, NULL);
- id = get_identifier (tm_name);
- SET_DECL_ASSEMBLER_NAME (decl, id);
- free (tm_name);
- }
-
- set_cfun (saved_cfun);
- current_function_decl = old_decl;
-}
-
+/* Create cgraph edges for function calls.
+ Also look for functions and variables having addresses taken. */
-/* Helper function doing the work for build_cgraph_edge */
-static void
-build_cgraph_edges_from_node (struct cgraph_node *node)
+static unsigned int
+build_cgraph_edges (void)
{
basic_block bb;
+ struct cgraph_node *node = cgraph_node (current_function_decl);
struct pointer_set_t *visited_nodes = pointer_set_create ();
gimple_stmt_iterator gsi;
tree step;
@@ -263,20 +196,6 @@ build_cgraph_edges_from_node (struct cgr
pointer_set_destroy (visited_nodes);
initialize_inline_failed (node);
-}
-
-/* Create cgraph edges for function calls.
- Also look for functions and variables having addresses taken. */
-
-static unsigned int
-build_cgraph_edges (void)
-{
- struct cgraph_node *node = cgraph_node (current_function_decl);
-
- build_cgraph_edges_from_node (node);
-
- /* prepare_tm_clone (node); */
-
return 0;
}
--- gimple-pretty-print.c (revision 141421)
+++ gimple-pretty-print.c (local)
@@ -498,9 +498,10 @@ dump_gimple_call (pretty_printer *buffer
if (gimple_call_return_slot_opt_p (gs))
pp_string (buffer, " [return slot optimization]");
-
if (gimple_call_tail_p (gs))
pp_string (buffer, " [tail call]");
+ if (gimple_call_in_tm_atomic_p (gs))
+ pp_string (buffer, " [in atomic]");
}
--- gimple.h (revision 141421)
+++ gimple.h (local)
@@ -104,6 +104,7 @@ enum gf_mask {
GF_CALL_RETURN_SLOT_OPT = 1 << 2,
GF_CALL_TAILCALL = 1 << 3,
GF_CALL_VA_ARG_PACK = 1 << 4,
+ GF_CALL_IN_TM_ATOMIC = 1 << 5,
GF_OMP_PARALLEL_COMBINED = 1 << 0,
/* True on an GIMPLE_OMP_RETURN statement if the return does not require
@@ -2222,6 +2223,31 @@ gimple_call_va_arg_pack_p (gimple s)
}
+/* If IN_TM_ATOMIC_P is true, GIMPLE_CALL S is within the dynamic scope of
+ a GIMPLE_TM_ATOMIC transaction. */
+
+static inline void
+gimple_call_set_in_tm_atomic (gimple s, bool in_tm_atomic_p)
+{
+ GIMPLE_CHECK (s, GIMPLE_CALL);
+ if (in_tm_atomic_p)
+ s->gsbase.subcode |= GF_CALL_IN_TM_ATOMIC;
+ else
+ s->gsbase.subcode &= ~GF_CALL_IN_TM_ATOMIC;
+}
+
+
+/* Return true if GIMPLE_CALL S is within the dynamic scope of
+ a transaction. */
+
+static inline bool
+gimple_call_in_tm_atomic_p (gimple s)
+{
+ GIMPLE_CHECK (s, GIMPLE_CALL);
+ return (s->gsbase.subcode & GF_CALL_IN_TM_ATOMIC) != 0;
+}
+
+
/* Return true if S is a noreturn call. */
static inline bool
--- print-tree.c (revision 141421)
+++ print-tree.c (local)
@@ -382,6 +382,8 @@ print_node (FILE *file, const char *pref
fputs (" built-in", file);
if (TREE_CODE (node) == FUNCTION_DECL && DECL_NO_STATIC_CHAIN (node))
fputs (" no-static-chain", file);
+ if (TREE_CODE (node) == FUNCTION_DECL && DECL_IS_TM_CLONE (node))
+ fputs (" tm-clone", file);
if (TREE_CODE (node) == FIELD_DECL && DECL_PACKED (node))
fputs (" packed", file);
@@ -448,10 +450,10 @@ print_node (FILE *file, const char *pref
fprintf (file, " %s", GET_MODE_NAME (mode));
}
- if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS) && DECL_DEFER_OUTPUT (node))
+ if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)
+ && DECL_DEFER_OUTPUT (node))
fputs (" defer-output", file);
-
xloc = expand_location (DECL_SOURCE_LOCATION (node));
fprintf (file, " file %s line %d col %d", xloc.file, xloc.line,
xloc.column);
--- trans-mem.c (revision 141421)
+++ trans-mem.c (local)
@@ -170,6 +170,8 @@ examine_call_tm (unsigned *state, gimple
tree fn_decl;
unsigned flags;
+ gimple_call_set_in_tm_atomic (stmt, true);
+
flags = gimple_call_flags (stmt);
if (flags & ECF_CONST)
return;
@@ -545,6 +547,16 @@ tm_atomic_subcode_ior (struct tm_region
}
+/* Add STMT to the EH region for the given TM region. */
+
+static void
+add_stmt_to_tm_region (struct tm_region *region, gimple stmt)
+{
+ if (region->region_nr >= 0)
+ add_stmt_to_eh_region (stmt, region->region_nr);
+}
+
+
/* Construct a call to TM_IRREVOKABLE and insert it before GSI. */
static void
@@ -555,7 +567,7 @@ expand_irrevokable (struct tm_region *re
tm_atomic_subcode_ior (region, GTMA_HAVE_CALL_IRREVOKABLE);
g = gimple_build_call (built_in_decls[BUILT_IN_TM_IRREVOKABLE], 0);
- add_stmt_to_eh_region (g, region->region_nr);
+ add_stmt_to_tm_region (region, g);
gsi_insert_before (gsi, g, GSI_SAME_STMT);
}
@@ -739,18 +751,12 @@ expand_assign_tm (struct tm_region *regi
else
return;
- add_stmt_to_eh_region (gcall, region->region_nr);
+ add_stmt_to_tm_region (region, gcall);
mark_vops_in_stmt (stmt);
gsi_remove (gsi, true);
}
-static tree
-find_tm_clone (tree orig_decl ATTRIBUTE_UNUSED)
-{
- return NULL_TREE;
-}
-
/* Expand a call statement as appropriate for a transaction. That is,
either verify that the call does not affect the transaction, or
redirect the call to a clone that handles transactions, or change
@@ -781,7 +787,7 @@ expand_call_tm (struct tm_region *region
return false;
}
- if (DECL_IS_TM_PURE (fn_decl))
+ if (DECL_IS_TM_PURE (fn_decl) || DECL_IS_TM_CLONE (fn_decl))
return false;
if (DECL_BUILT_IN_CLASS (fn_decl) == BUILT_IN_NORMAL)
@@ -809,13 +815,6 @@ expand_call_tm (struct tm_region *region
return false;
}
- fn_decl = find_tm_clone (fn_decl);
- if (fn_decl)
- {
- gimple_call_set_fndecl (stmt, fn_decl);
- return false;
- }
-
expand_irrevokable (region, gsi);
return false;
}
@@ -1140,20 +1139,172 @@ struct gimple_opt_pass pass_tm_memopt =
};
+static struct cgraph_node_hook_list *function_insertion_hook_holder;
+
+static void
+ipa_tm_analyze_function (struct cgraph_node *node)
+{
+ struct cgraph_edge *e;
+
+ if (cgraph_function_body_availability (node) < AVAIL_OVERWRITABLE)
+ return;
+
+ /* If this is a transaction clone, then by definition we're already
+ inside a transaction, and thus by definition all of our callees
+ are within a transaction. */
+ if (DECL_IS_TM_CLONE (node->decl))
+ {
+ for (e = node->callees; e ; e = e->next_callee)
+ e->tm_atomic_call = 1;
+ }
+
+ /* Otherwise, scan all blocks and transfer the IN-ATOMIC bit we set
+ on the call statement to the cgraph edge. */
+ else
+ {
+ basic_block bb;
+
+ FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
+ {
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (is_gimple_call (stmt)
+ && gimple_call_in_tm_atomic_p (stmt))
+ {
+ e = cgraph_edge (node, stmt);
+ e->tm_atomic_call = 1;
+ }
+ }
+ }
+ }
+}
+
+static void
+ipa_tm_add_new_function (struct cgraph_node *node, void * ARG_UNUSED (data))
+{
+ ipa_tm_analyze_function (node);
+}
+
+static void
+ipa_tm_generate_summary (void)
+{
+ struct cgraph_node *node;
+
+ function_insertion_hook_holder =
+ cgraph_add_function_insertion_hook (&ipa_tm_add_new_function, NULL);
+
+ for (node = cgraph_nodes; node; node = node->next)
+ if (node->lowered)
+ ipa_tm_analyze_function (node);
+}
+
+static void
+ipa_tm_create_version (struct cgraph_node *old_node,
+ VEC (cgraph_edge_p, heap) *redirections)
+{
+ struct cgraph_node *new_node;
+ char *tm_name;
+
+ new_node = cgraph_function_versioning (old_node, redirections,
+ NULL, NULL);
+
+ /* ??? Versioning can fail at the discression of the inliner. */
+ if (new_node == NULL)
+ return;
+
+ /* The generic versioning code forces the function to be visible
+ only within this translation unit. This isn't what we want for
+ functions the programmer marked TM_CALLABLE. */
+ if (cgraph_is_master_clone (old_node)
+ && DECL_IS_TM_CALLABLE (old_node->decl))
+ {
+ DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl);
+ TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
+ DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl);
+
+ new_node->local.externally_visible = old_node->local.externally_visible;
+ new_node->local.local = old_node->local.local;
+ }
+
+ DECL_IS_TM_CLONE (new_node->decl) = 1;
+
+ /* ??? In tree_function_versioning, we futzed with the DECL_NAME. I'm
+ not sure why we did this, as it's surely going to destroy any hope
+ of debugging. */
+ DECL_NAME (new_node->decl) = DECL_NAME (old_node->decl);
+
+ /* ??? The current Intel ABI for these symbols uses this first variant.
+ I believe we ought to be considering _ZGT{t,n,m} extensions to the
+ C++ name mangling ABI. */
+#if !defined(NO_DOT_IN_LABEL) && !defined(NO_DOLLAR_IN_LABEL)
+# define TM_SUFFIX ".$TXN"
+#elif !defined(NO_DOT_IN_LABEL)
+# define TM_SUFFIX ".TXN"
+#elif !defined(NO_DOLLAR_IN_LABEL)
+# define TM_SUFFIX "$TXN"
+#else
+# define TM_SUFFIX "__TXN"
+#endif
+
+ tm_name = concat (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (old_node->decl)),
+ TM_SUFFIX, NULL);
+ SET_DECL_ASSEMBLER_NAME (new_node->decl, get_identifier (tm_name));
+ free (tm_name);
+}
+
+static void
+ipa_tm_decide_version (struct cgraph_node *node)
+{
+ VEC (cgraph_edge_p, heap) *redirections = NULL;
+ cgraph_edge_p e;
+
+ /* Don't re-process transaction clones. */
+ if (DECL_IS_TM_CLONE (node->decl))
+ return;
+
+ /* Collect a vector of all the call sites that are within transactions. */
+ for (e = node->callers; e ; e = e->next_caller)
+ if (e->tm_atomic_call)
+ VEC_safe_push (cgraph_edge_p, heap, redirections, e);
+
+ /* Create a transaction version if the programmer has explicitly
+ requested one. Create a transaction version if the version of
+ the function defined here is known to be used, and it has
+ transaction callers. */
+ if ((cgraph_is_master_clone (node) && DECL_IS_TM_CALLABLE (node->decl))
+ || (cgraph_function_body_availability (node) >= AVAIL_AVAILABLE
+ && !VEC_empty (cgraph_edge_p, redirections)))
+ {
+ ipa_tm_create_version (node, redirections);
+ }
+
+ VEC_free (cgraph_edge_p, heap, redirections);
+}
static unsigned int
-execute_ipa_tm (void)
+ipa_tm_execute (void)
{
+ struct cgraph_node *node;
+
+ cgraph_remove_function_insertion_hook (function_insertion_hook_holder);
+
+ for (node = cgraph_nodes; node; node = node->next)
+ if (node->lowered
+ && (node->needed || node->reachable))
+ ipa_tm_decide_version (node);
+
return 0;
}
-struct simple_ipa_opt_pass pass_ipa_tm =
+struct ipa_opt_pass pass_ipa_tm =
{
{
- SIMPLE_IPA_PASS,
+ IPA_PASS,
"tmipa", /* name */
gate_tm, /* gate */
- execute_ipa_tm, /* execute */
+ ipa_tm_execute, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
@@ -1163,5 +1314,12 @@ struct simple_ipa_opt_pass pass_ipa_tm =
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
- }
+ },
+ ipa_tm_generate_summary, /* generate_summary */
+ NULL, /* write_summary */
+ NULL, /* read_summary */
+ NULL, /* function_read_summary */
+ 0, /* TODOs */
+ NULL, /* function_transform */
+ NULL, /* variable_transform */
};
--- tree-cfg.c (revision 141421)
+++ tree-cfg.c (local)
@@ -2600,10 +2600,13 @@ is_ctrl_altering_stmt (gimple t)
if (!(flags & (ECF_CONST | ECF_PURE)) && cfun->has_nonlocal_label)
return true;
- /* A call also alters control flow if it does not return.
- A call alters control flow if it may generate a
+ /* A call also alters control flow if it does not return. */
+ if (flags & ECF_NORETURN)
+ return true;
+
+ /* A call alters control flow if it may generate a
transaction restart. */
- if (flags & (ECF_NORETURN | ECF_TM_OPS))
+ if ((flags & ECF_TM_OPS) && lookup_stmt_eh_region (t) >= 0)
return true;
}
break;
--- tree-inline.c (revision 141421)
+++ tree-inline.c (local)
@@ -4184,10 +4184,6 @@ tree_versionable_function_p (tree fndecl
{
if (fndecl == NULL_TREE)
return false;
- /* ??? There are cases where a function is
- uninlinable but can be versioned. */
- if (!tree_inlinable_function_p (fndecl))
- return false;
return true;
}
--- tree-pass.h (revision 141421)
+++ tree-pass.h (local)
@@ -399,6 +399,7 @@ extern struct ipa_opt_pass pass_ipa_inli
extern struct ipa_opt_pass pass_ipa_cp;
extern struct ipa_opt_pass pass_ipa_reference;
extern struct ipa_opt_pass pass_ipa_pure_const;
+extern struct ipa_opt_pass pass_ipa_tm;
extern struct simple_ipa_opt_pass pass_ipa_matrix_reorg;
extern struct simple_ipa_opt_pass pass_ipa_early_inline;
@@ -408,7 +409,6 @@ extern struct simple_ipa_opt_pass pass_i
extern struct simple_ipa_opt_pass pass_early_local_passes;
extern struct simple_ipa_opt_pass pass_ipa_increase_alignment;
extern struct simple_ipa_opt_pass pass_ipa_function_and_variable_visibility;
-extern struct simple_ipa_opt_pass pass_ipa_tm;
extern struct gimple_opt_pass pass_all_optimizations;
extern struct gimple_opt_pass pass_cleanup_cfg_post_optimizing;