[patch] 19/n: trans-mem: middle end/misc patches (LAST PATCH)
Richard Guenther
richard.guenther@gmail.com
Fri Nov 4 11:22:00 GMT 2011
On Thu, Nov 3, 2011 at 8:32 PM, Aldy Hernandez <aldyh@redhat.com> wrote:
> This is everything else that doesn't fit neatly into any other category.
> Here are the middle end changes, as well as pass ordering code, along with
> varasm and a potpourri of other small changes.
>
> This is the last patch. Please let me know if there is anything else
> (reasonable) you would like me to post.
>
> Index: gcc/cgraph.h
> ===================================================================
> --- gcc/cgraph.h (.../trunk) (revision 180744)
> +++ gcc/cgraph.h (.../branches/transactional-memory) (revision
> 180773)
> @@ -98,6 +98,9 @@ struct GTY(()) cgraph_local_info {
> /* True when the function has been originally extern inline, but it is
> redefined now. */
> unsigned redefined_extern_inline : 1;
> +
> + /* True if the function may enter serial irrevocable mode. */
> + unsigned tm_may_enter_irr : 1;
> };
>
> /* Information about the function that needs to be computed globally
> @@ -565,6 +568,8 @@ void verify_cgraph_node (struct cgraph_n
> void cgraph_build_static_cdtor (char which, tree body, int priority);
> void cgraph_reset_static_var_maps (void);
> void init_cgraph (void);
> +struct cgraph_node * cgraph_copy_node_for_versioning (struct cgraph_node *,
> + tree, VEC(cgraph_edge_p,heap)*, bitmap);
> struct cgraph_node *cgraph_function_versioning (struct cgraph_node *,
> VEC(cgraph_edge_p,heap)*,
> VEC(ipa_replace_map_p,gc)*,
> Index: gcc/tree-pass.h
> ===================================================================
> --- gcc/tree-pass.h (.../trunk) (revision 180744)
> +++ gcc/tree-pass.h (.../branches/transactional-memory) (revision
> 180773)
> @@ -447,6 +447,12 @@ extern struct gimple_opt_pass pass_build
> extern struct gimple_opt_pass pass_local_pure_const;
> extern struct gimple_opt_pass pass_tracer;
> extern struct gimple_opt_pass pass_warn_unused_result;
> +extern struct gimple_opt_pass pass_diagnose_tm_blocks;
> +extern struct gimple_opt_pass pass_lower_tm;
> +extern struct gimple_opt_pass pass_tm_init;
> +extern struct gimple_opt_pass pass_tm_mark;
> +extern struct gimple_opt_pass pass_tm_memopt;
> +extern struct gimple_opt_pass pass_tm_edges;
> extern struct gimple_opt_pass pass_split_functions;
> extern struct gimple_opt_pass pass_feedback_split_functions;
>
> @@ -469,6 +475,7 @@ extern struct ipa_opt_pass_d pass_ipa_pu
> extern struct simple_ipa_opt_pass pass_ipa_pta;
> extern struct ipa_opt_pass_d pass_ipa_lto_wpa_fixup;
> extern struct ipa_opt_pass_d pass_ipa_lto_finish_out;
> +extern struct simple_ipa_opt_pass pass_ipa_tm;
> extern struct ipa_opt_pass_d pass_ipa_profile;
> extern struct ipa_opt_pass_d pass_ipa_cdtor_merge;
>
> Index: gcc/rtlanal.c
> ===================================================================
> --- gcc/rtlanal.c (.../trunk) (revision 180744)
> +++ gcc/rtlanal.c (.../branches/transactional-memory) (revision
> 180773)
> @@ -1918,6 +1918,7 @@ alloc_reg_note (enum reg_note kind, rtx
> case REG_CC_USER:
> case REG_LABEL_TARGET:
> case REG_LABEL_OPERAND:
> + case REG_TM:
> /* These types of register notes use an INSN_LIST rather than an
> EXPR_LIST, so that copying is done right and dumps look
> better. */
> Index: gcc/omp-low.c
> ===================================================================
> --- gcc/omp-low.c (.../trunk) (revision 180744)
> +++ gcc/omp-low.c (.../branches/transactional-memory) (revision
> 180773)
> @@ -139,6 +139,7 @@ static tree scan_omp_1_op (tree *, int *
> case GIMPLE_TRY: \
> case GIMPLE_CATCH: \
> case GIMPLE_EH_FILTER: \
> + case GIMPLE_TRANSACTION: \
> /* The sub-statements for these should be walked. */ \
> *handled_ops_p = false; \
> break;
> Index: gcc/toplev.c
> ===================================================================
> --- gcc/toplev.c (.../trunk) (revision 180744)
> +++ gcc/toplev.c (.../branches/transactional-memory) (revision
> 180773)
> @@ -599,6 +599,7 @@ compile_file (void)
>
> output_shared_constant_pool ();
> output_object_blocks ();
> + finish_tm_clone_pairs ();
> /* Write out any pending weak symbol declarations. */
> weak_finish ();
> Index: gcc/cgraphunit.c
> ===================================================================
> --- gcc/cgraphunit.c (.../trunk) (revision 180744)
> +++ gcc/cgraphunit.c (.../branches/transactional-memory) (revision
> 180773)
> @@ -2272,7 +2272,7 @@ update_call_expr (struct cgraph_node *ne
> was copied to prevent duplications of calls that are dead
> in the clone. */
>
> -static struct cgraph_node *
> +struct cgraph_node *
> cgraph_copy_node_for_versioning (struct cgraph_node *old_version,
> tree new_decl,
> VEC(cgraph_edge_p,heap) *redirect_callers,
> @@ -2286,7 +2286,7 @@ cgraph_copy_node_for_versioning (struct
>
> new_version = cgraph_create_node (new_decl);
>
> - new_version->analyzed = true;
> + new_version->analyzed = old_version->analyzed;
Hm? analyzed means "with body", sure you have a body if you clone.
> new_version->local = old_version->local;
> new_version->local.externally_visible = false;
> new_version->local.local = true;
> @@ -2294,6 +2294,7 @@ cgraph_copy_node_for_versioning (struct
> new_version->rtl = old_version->rtl;
> new_version->reachable = true;
> new_version->count = old_version->count;
> + new_version->lowered = true;
OTOH this isn't necessary true. cgraph exists before lowering.
> for (e = old_version->callees; e; e=e->next_callee)
> if (!bbs_to_copy
> @@ -2389,7 +2390,6 @@ cgraph_function_versioning (struct cgrap
> DECL_VIRTUAL_P (new_version_node->decl) = 0;
> new_version_node->local.externally_visible = 0;
> new_version_node->local.local = 1;
> - new_version_node->lowered = true;
>
> /* Update the call_expr on the edges to call the new version node. */
> update_call_expr (new_version_node);
> Index: gcc/tree-ssa-alias.c
> ===================================================================
> --- gcc/tree-ssa-alias.c (.../trunk) (revision 180744)
> +++ gcc/tree-ssa-alias.c (.../branches/transactional-memory)
> (revision 180773)
> @@ -1182,6 +1182,8 @@ ref_maybe_used_by_call_p_1 (gimple call,
> case BUILT_IN_MEMPCPY:
> case BUILT_IN_STPCPY:
> case BUILT_IN_STPNCPY:
> + case BUILT_IN_TM_MEMCPY:
> + case BUILT_IN_TM_MEMMOVE:
> {
> ao_ref dref;
> tree size = NULL_TREE;
> @@ -1228,6 +1230,32 @@ ref_maybe_used_by_call_p_1 (gimple call,
> size);
> return refs_may_alias_p_1 (&dref, ref, false);
> }
> +
> + /* The following functions read memory pointed to by their
> + first argument. */
> + CASE_BUILT_IN_TM_LOAD (1):
> + CASE_BUILT_IN_TM_LOAD (2):
> + CASE_BUILT_IN_TM_LOAD (4):
> + CASE_BUILT_IN_TM_LOAD (8):
> + CASE_BUILT_IN_TM_LOAD (FLOAT):
> + CASE_BUILT_IN_TM_LOAD (DOUBLE):
> + CASE_BUILT_IN_TM_LOAD (LDOUBLE):
> + CASE_BUILT_IN_TM_LOAD (M64):
> + CASE_BUILT_IN_TM_LOAD (M128):
> + CASE_BUILT_IN_TM_LOAD (M256):
> + case BUILT_IN_TM_LOG:
> + case BUILT_IN_TM_LOG_1:
> + case BUILT_IN_TM_LOG_2:
> + case BUILT_IN_TM_LOG_4:
> + case BUILT_IN_TM_LOG_8:
> + case BUILT_IN_TM_LOG_FLOAT:
> + case BUILT_IN_TM_LOG_DOUBLE:
> + case BUILT_IN_TM_LOG_LDOUBLE:
> + case BUILT_IN_TM_LOG_M64:
> + case BUILT_IN_TM_LOG_M128:
> + case BUILT_IN_TM_LOG_M256:
> + return ptr_deref_may_alias_ref_p_1 (gimple_call_arg (call, 0),
> ref);
> +
> /* These read memory pointed to by the first argument. */
> case BUILT_IN_STRDUP:
> case BUILT_IN_STRNDUP:
> @@ -1250,6 +1278,7 @@ ref_maybe_used_by_call_p_1 (gimple call,
> case BUILT_IN_STACK_SAVE:
> case BUILT_IN_STACK_RESTORE:
> case BUILT_IN_MEMSET:
> + case BUILT_IN_TM_MEMSET:
> case BUILT_IN_MEMSET_CHK:
> case BUILT_IN_FREXP:
> case BUILT_IN_FREXPF:
> @@ -1480,6 +1509,19 @@ call_may_clobber_ref_p_1 (gimple call, a
> case BUILT_IN_STRCAT:
> case BUILT_IN_STRNCAT:
> case BUILT_IN_MEMSET:
> + case BUILT_IN_TM_MEMSET:
> + CASE_BUILT_IN_TM_STORE (1):
> + CASE_BUILT_IN_TM_STORE (2):
> + CASE_BUILT_IN_TM_STORE (4):
> + CASE_BUILT_IN_TM_STORE (8):
> + CASE_BUILT_IN_TM_STORE (FLOAT):
> + CASE_BUILT_IN_TM_STORE (DOUBLE):
> + CASE_BUILT_IN_TM_STORE (LDOUBLE):
> + CASE_BUILT_IN_TM_STORE (M64):
> + CASE_BUILT_IN_TM_STORE (M128):
> + CASE_BUILT_IN_TM_STORE (M256):
> + case BUILT_IN_TM_MEMCPY:
> + case BUILT_IN_TM_MEMMOVE:
> {
> ao_ref dref;
> tree size = NULL_TREE;
> Index: gcc/ipa-inline.c
> ===================================================================
> --- gcc/ipa-inline.c (.../trunk) (revision 180744)
> +++ gcc/ipa-inline.c (.../branches/transactional-memory) (revision
> 180773)
> @@ -284,6 +284,15 @@ can_inline_edge_p (struct cgraph_edge *e
> e->inline_failed = CIF_EH_PERSONALITY;
> inlinable = false;
> }
> + /* TM pure functions should not get inlined if the outer function is
> + a TM safe function. */
> + else if (flag_tm
Please move flag checks into the respective prediates. Any reason
why the is_tm_pure () predicate wouldn't already do the correct thing
with !flag_tm?
> + && is_tm_pure (callee->decl)
> + && is_tm_safe (e->caller->decl))
> + {
> + e->inline_failed = CIF_UNSPECIFIED;
> + inlinable = false;
> + }
> /* Don't inline if the callee can throw non-call exceptions but the
> caller cannot.
> FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is
> missing.
> Index: gcc/crtstuff.c
> ===================================================================
> --- gcc/crtstuff.c (.../trunk) (revision 180744)
> +++ gcc/crtstuff.c (.../branches/transactional-memory) (revision
> 180773)
> @@ -162,6 +162,9 @@ extern void __do_global_ctors_1 (void);
> /* Likewise for _Jv_RegisterClasses. */
> extern void _Jv_RegisterClasses (void *) TARGET_ATTRIBUTE_WEAK;
>
> +extern void _ITM_registerTMCloneTable (void *, size_t)
> TARGET_ATTRIBUTE_WEAK;
> +extern void _ITM_deregisterTMCloneTable (void *) TARGET_ATTRIBUTE_WEAK;
> +
> #ifdef OBJECT_FORMAT_ELF
>
> /* Declare a pointer to void function type. */
> @@ -241,6 +244,11 @@ STATIC void *__JCR_LIST__[]
> = { };
> #endif /* JCR_SECTION_NAME */
>
> +STATIC func_ptr __TMC_LIST__[]
> + __attribute__((unused, section(".tm_clone_table"),
> aligned(sizeof(void*))))
> + = { };
> +extern func_ptr __TMC_END__[] __attribute__((__visibility__ ("hidden")));
> +
> #if defined(INIT_SECTION_ASM_OP) || defined(INIT_ARRAY_SECTION_ASM_OP)
>
> #ifdef OBJECT_FORMAT_ELF
> @@ -330,6 +338,13 @@ __do_global_dtors_aux (void)
> }
> #endif /* !defined(FINI_ARRAY_SECTION_ASM_OP) */
>
> + if (_ITM_deregisterTMCloneTable)
> + {
> + size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
> + if (size > 0)
> + _ITM_deregisterTMCloneTable (__TMC_LIST__);
> + }
> +
> #ifdef USE_EH_FRAME_REGISTRY
> #ifdef CRT_GET_RFIB_DATA
> /* If we used the new __register_frame_info_bases interface,
> @@ -391,6 +406,12 @@ frame_dummy (void)
> register_classes (__JCR_LIST__);
> }
> #endif /* JCR_SECTION_NAME */
> + if (_ITM_registerTMCloneTable)
> + {
> + size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
> + if (size > 0)
> + _ITM_registerTMCloneTable (__TMC_LIST__, size);
> + }
> }
>
> #ifdef INIT_SECTION_ASM_OP
> @@ -457,6 +478,13 @@ __do_global_dtors (void)
> for (p = __DTOR_LIST__ + 1; (f = *p); p++)
> f ();
>
> + if (_ITM_deregisterTMCloneTable)
> + {
> + size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
> + if (size > 0)
> + _ITM_deregisterTMCloneTable (__TMC_LIST__);
> + }
> +
> #ifdef USE_EH_FRAME_REGISTRY
> if (__deregister_frame_info)
> __deregister_frame_info (__EH_FRAME_BEGIN__);
> @@ -570,6 +598,11 @@ STATIC void *__JCR_END__[1]
> = { 0 };
> #endif /* JCR_SECTION_NAME */
>
> +func_ptr __TMC_END__[]
> + __attribute__((unused, section(".tm_clone_table"), aligned(sizeof(void
> *)),
> + __visibility__ ("hidden")))
> + = { };
> +
> #ifdef INIT_ARRAY_SECTION_ASM_OP
>
> /* If we are using .init_array, there is nothing to do. */
> Index: gcc/cfgbuild.c
> ===================================================================
> --- gcc/cfgbuild.c (.../trunk) (revision 180744)
> +++ gcc/cfgbuild.c (.../branches/transactional-memory) (revision
> 180773)
> @@ -338,18 +338,30 @@ make_edges (basic_block min, basic_block
> /* Add any appropriate EH edges. */
> rtl_make_eh_edge (edge_cache, bb, insn);
>
> - if (code == CALL_INSN && nonlocal_goto_handler_labels)
> + if (code == CALL_INSN)
> {
> - /* ??? This could be made smarter: in some cases it's possible
> - to tell that certain calls will not do a nonlocal goto.
> - For example, if the nested functions that do the nonlocal
> - gotos do not have their addresses taken, then only calls to
> - those functions or to other nested functions that use them
> - could possibly do nonlocal gotos. */
> if (can_nonlocal_goto (insn))
> - for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1))
> - make_label_edge (edge_cache, bb, XEXP (x, 0),
> - EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
> + {
> + /* ??? This could be made smarter: in some cases it's
> + possible to tell that certain calls will not do a
> + nonlocal goto. For example, if the nested functions
> + that do the nonlocal gotos do not have their addresses
> + taken, then only calls to those functions or to other
> + nested functions that use them could possibly do
> + nonlocal gotos. */
> + for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1))
> + make_label_edge (edge_cache, bb, XEXP (x, 0),
> + EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
> + }
> +
> + if (flag_tm)
> + {
> + rtx note;
> + for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
> + if (REG_NOTE_KIND (note) == REG_TM)
> + make_label_edge (edge_cache, bb, XEXP (note, 0),
> + EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
> + }
> }
> }
>
> Index: gcc/timevar.def
> ===================================================================
> --- gcc/timevar.def (.../trunk) (revision 180744)
> +++ gcc/timevar.def (.../branches/transactional-memory) (revision
> 180773)
> @@ -184,6 +184,7 @@ DEFTIMEVAR (TV_TREE_COPY_RENAME , "
> DEFTIMEVAR (TV_TREE_SSA_VERIFY , "tree SSA verifier")
> DEFTIMEVAR (TV_TREE_STMT_VERIFY , "tree STMT verifier")
> DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch initialization
> conversion")
> +DEFTIMEVAR (TV_TRANS_MEM , "transactional memory")
> DEFTIMEVAR (TV_TREE_STRLEN , "tree strlen optimization")
> DEFTIMEVAR (TV_CGRAPH_VERIFY , "callgraph verifier")
> DEFTIMEVAR (TV_DOM_FRONTIERS , "dominance frontiers")
> Index: gcc/recog.c
> ===================================================================
> --- gcc/recog.c (.../trunk) (revision 180744)
> +++ gcc/recog.c (.../branches/transactional-memory) (revision 180773)
> @@ -3287,6 +3287,7 @@ peep2_attempt (basic_block bb, rtx insn,
> {
> case REG_NORETURN:
> case REG_SETJMP:
> + case REG_TM:
> add_reg_note (new_insn, REG_NOTE_KIND (note),
> XEXP (note, 0));
> break;
> Index: gcc/function.h
> ===================================================================
> --- gcc/function.h (.../trunk) (revision 180744)
> +++ gcc/function.h (.../branches/transactional-memory) (revision
> 180773)
> @@ -467,6 +467,14 @@ extern GTY(()) struct rtl_data x_rtl;
> want to do differently. */
> #define crtl (&x_rtl)
>
> +/* This structure is used to map a gimple statement to a label,
> + or list of labels to represent transaction restart. */
> +
> +struct GTY(()) tm_restart_node {
> + gimple stmt;
> + tree label_or_list;
> +};
> +
> struct GTY(()) stack_usage
> {
> /* # of bytes of static stack space allocated by the function. */
> @@ -518,6 +526,10 @@ struct GTY(()) function {
> /* Value histograms attached to particular statements. */
> htab_t GTY((skip)) value_histograms;
>
> + /* Map gimple stmt to tree label (or list of labels) for transaction
> + restart and abort. */
> + htab_t GTY ((param_is (struct tm_restart_node))) tm_restart;
> +
As this maps 'gimple' to tree shouldn't this go to fn->gimple_df instead?
That way you avoid growing generic struct function. Or in to eh_status,
if that looks like a better fit.
> /* For function.c. */
>
> /* Points to the FUNCTION_DECL of this function. */
> Index: gcc/emit-rtl.c
> ===================================================================
> --- gcc/emit-rtl.c (.../trunk) (revision 180744)
> +++ gcc/emit-rtl.c (.../branches/transactional-memory) (revision
> 180773)
> @@ -3595,6 +3595,7 @@ try_split (rtx pat, rtx trial, int last)
>
> case REG_NORETURN:
> case REG_SETJMP:
> + case REG_TM:
> for (insn = insn_last; insn != NULL_RTX; insn = PREV_INSN (insn))
> {
> if (CALL_P (insn))
> Index: gcc/cfgexpand.c
> ===================================================================
> --- gcc/cfgexpand.c (.../trunk) (revision 180744)
> +++ gcc/cfgexpand.c (.../branches/transactional-memory) (revision
> 180773)
> @@ -2096,6 +2096,32 @@ expand_gimple_stmt (gimple stmt)
> }
> }
>
> + /* Mark all calls that can have a transaction restart. */
Why isn't this done when we expand the call? This walking of the
RTL sequence looks like a hack (an easy one, albeit).
> + if (cfun->tm_restart && is_gimple_call (stmt))
> + {
> + struct tm_restart_node dummy;
> + void **slot;
> +
> + dummy.stmt = stmt;
> + slot = htab_find_slot (cfun->tm_restart, &dummy, NO_INSERT);
> + if (slot)
> + {
> + struct tm_restart_node *n = (struct tm_restart_node *) *slot;
> + tree list = n->label_or_list;
> + rtx insn;
> +
> + for (insn = next_real_insn (last); !CALL_P (insn);
> + insn = next_real_insn (insn))
> + continue;
> +
> + if (TREE_CODE (list) == LABEL_DECL)
> + add_reg_note (insn, REG_TM, label_rtx (list));
> + else
> + for (; list ; list = TREE_CHAIN (list))
> + add_reg_note (insn, REG_TM, label_rtx (TREE_VALUE (list)));
> + }
> + }
> +
> return last;
> }
>
> @@ -4455,6 +4481,10 @@ gimple_expand_cfg (void)
> /* After expanding, the return labels are no longer needed. */
> return_label = NULL;
> naked_return_label = NULL;
> +
> + /* After expanding, the tm_restart map is no longer needed. */
> + cfun->tm_restart = NULL;
You should still free it, to not confuse the statistics code I think.
> +
> /* Tag the blocks with a depth number so that change_scope can find
> the common parent easily. */
> set_block_levels (DECL_INITIAL (cfun->decl), 0);
> Index: gcc/varasm.c
> ===================================================================
> --- gcc/varasm.c (.../trunk) (revision 180744)
> +++ gcc/varasm.c (.../branches/transactional-memory) (revision
> 180773)
> @@ -5859,6 +5859,103 @@ assemble_alias (tree decl, tree target)
> }
> }
>
> +/* Record and output a table of translations from original function
> + to its transaction aware clone. Note that tm_pure functions are
> + considered to be their own clone. */
> +
> +static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
> + htab_t tm_clone_pairs;
> +
> +void
> +record_tm_clone_pair (tree o, tree n)
> +{
> + struct tree_map **slot, *h;
> +
> + if (tm_clone_pairs == NULL)
> + tm_clone_pairs = htab_create_ggc (32, tree_map_hash, tree_map_eq, 0);
> +
> + h = ggc_alloc_tree_map ();
> + h->hash = htab_hash_pointer (o);
> + h->base.from = o;
> + h->to = n;
> +
> + slot = (struct tree_map **)
> + htab_find_slot_with_hash (tm_clone_pairs, h, h->hash, INSERT);
> + *slot = h;
> +}
> +
> +tree
> +get_tm_clone_pair (tree o)
> +{
> + if (tm_clone_pairs)
> + {
> + struct tree_map *h, in;
> +
> + in.base.from = o;
> + in.hash = htab_hash_pointer (o);
> + h = (struct tree_map *) htab_find_with_hash (tm_clone_pairs,
> + &in, in.hash);
> + if (h)
> + return h->to;
> + }
> + return NULL_TREE;
> +}
> +
> +/* Helper function for finish_tm_clone_pairs. Dump the clone table. */
> +
> +int
> +finish_tm_clone_pairs_1 (void **slot, void *info ATTRIBUTE_UNUSED)
> +{
> + struct tree_map *map = (struct tree_map *) *slot;
> + bool *switched = (bool *) info;
> + tree src = map->base.from;
> + tree dst = map->to;
> + struct cgraph_node *src_n = cgraph_get_node (src);
> + struct cgraph_node *dst_n = cgraph_get_node (dst);
> +
> + /* The function ipa_tm_create_version() marks the clone as needed if
> + the original function was needed. But we also mark the clone as
> + needed if we ever called the clone indirectly through
> + TM_GETTMCLONE. If neither of these are true, we didn't generate
> + a clone, and we didn't call it indirectly... no sense keeping it
> + in the clone table. */
> + if (!dst_n || !dst_n->needed)
> + return 1;
> +
> + /* This covers the case where we have optimized the original
> + function away, and only access the transactional clone. */
> + if (!src_n || !src_n->needed)
> + return 1;
> +
> + if (!*switched)
> + {
> + switch_to_section (get_named_section (NULL, ".tm_clone_table", 3));
> + assemble_align (POINTER_SIZE);
> + *switched = true;
> + }
> +
> + assemble_integer (XEXP (DECL_RTL (src), 0),
> + POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
> + assemble_integer (XEXP (DECL_RTL (dst), 0),
> + POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
> + return 1;
> +}
> +
> +void
> +finish_tm_clone_pairs (void)
> +{
> + bool switched = false;
> +
> + if (tm_clone_pairs == NULL)
> + return;
> +
> + htab_traverse_noresize (tm_clone_pairs, finish_tm_clone_pairs_1,
> + (void *) &switched);
This makes the generated table dependent on memory layout. You
need to walk the pairs in some deterministic order. In fact why not
walk all cgraph_nodes looking for the pairs - they should be still
in the list of clones for a node and you've marked it with DECL_TM_CLONE.
You can then sort them by cgraph node uid.
Did you check bootstrapping GCC with TM enabled and address-space
randomization turned on?
> + htab_delete (tm_clone_pairs);
> + tm_clone_pairs = NULL;
> +}
> +
> +
> /* Emit an assembler directive to set symbol for DECL visibility to
> the visibility type VIS, which must not be VISIBILITY_DEFAULT. */
>
> Index: gcc/output.h
> ===================================================================
> --- gcc/output.h (.../trunk) (revision 180744)
> +++ gcc/output.h (.../branches/transactional-memory) (revision
> 180773)
> @@ -606,6 +606,11 @@ extern bool unlikely_text_section_p (sec
> extern void switch_to_section (section *);
> extern void output_section_asm_op (const void *);
>
> +extern void record_tm_clone_pair (tree, tree);
> +extern void finish_tm_clone_pairs (void);
> +extern int finish_tm_clone_pairs_1 (void **, void *);
> +extern tree get_tm_clone_pair (tree);
> +
> extern void default_asm_output_source_filename (FILE *, const char *);
> extern void output_file_directive (FILE *, const char *);
>
> Index: gcc/combine.c
> ===================================================================
> --- gcc/combine.c (.../trunk) (revision 180744)
> +++ gcc/combine.c (.../branches/transactional-memory) (revision
> 180773)
> @@ -13286,6 +13286,7 @@ distribute_notes (rtx notes, rtx from_in
>
> case REG_NORETURN:
> case REG_SETJMP:
> + case REG_TM:
> /* These notes must remain with the call. It should not be
> possible for both I2 and I3 to be a call. */
> if (CALL_P (i3))
> Index: gcc/tree-flow.h
> ===================================================================
> --- gcc/tree-flow.h (.../trunk) (revision 180744)
> +++ gcc/tree-flow.h (.../branches/transactional-memory) (revision
> 180773)
> @@ -778,6 +778,9 @@ extern bool maybe_duplicate_eh_stmt (gim
> extern bool verify_eh_edges (gimple);
> extern bool verify_eh_dispatch_edge (gimple);
>
> +/* In gtm-low.c */
> +extern bool is_transactional_stmt (const_gimple);
> +
gimple.h please. looks like a gimple predicate as well, so the implementation
should be in gimple.c?
> /* In tree-ssa-pre.c */
> struct pre_expr_d;
> void add_to_value (unsigned int, struct pre_expr_d *);
> Index: gcc/tree-ssa-structalias.c
> ===================================================================
> --- gcc/tree-ssa-structalias.c (.../trunk) (revision 180744)
> +++ gcc/tree-ssa-structalias.c (.../branches/transactional-memory)
> (revision 180773)
> @@ -4024,6 +4024,8 @@ find_func_aliases_for_builtin_call (gimp
> case BUILT_IN_STPCPY_CHK:
> case BUILT_IN_STRCAT_CHK:
> case BUILT_IN_STRNCAT_CHK:
> + case BUILT_IN_TM_MEMCPY:
> + case BUILT_IN_TM_MEMMOVE:
> {
> tree res = gimple_call_lhs (t);
> tree dest = gimple_call_arg (t, (DECL_FUNCTION_CODE (fndecl)
> @@ -4056,6 +4058,7 @@ find_func_aliases_for_builtin_call (gimp
> }
> case BUILT_IN_MEMSET:
> case BUILT_IN_MEMSET_CHK:
> + case BUILT_IN_TM_MEMSET:
> {
> tree res = gimple_call_lhs (t);
> tree dest = gimple_call_arg (t, 0);
> @@ -4197,6 +4200,50 @@ find_func_aliases_for_builtin_call (gimp
> }
> return true;
> }
> + CASE_BUILT_IN_TM_STORE (1):
> + CASE_BUILT_IN_TM_STORE (2):
> + CASE_BUILT_IN_TM_STORE (4):
> + CASE_BUILT_IN_TM_STORE (8):
> + CASE_BUILT_IN_TM_STORE (FLOAT):
> + CASE_BUILT_IN_TM_STORE (DOUBLE):
> + CASE_BUILT_IN_TM_STORE (LDOUBLE):
> + CASE_BUILT_IN_TM_STORE (M64):
> + CASE_BUILT_IN_TM_STORE (M128):
> + CASE_BUILT_IN_TM_STORE (M256):
> + {
> + tree addr = gimple_call_arg (t, 0);
> + tree src = gimple_call_arg (t, 1);
> +
> + get_constraint_for (addr, &lhsc);
> + do_deref (&lhsc);
> + get_constraint_for (src, &rhsc);
> + process_all_all_constraints (lhsc, rhsc);
> + VEC_free (ce_s, heap, lhsc);
> + VEC_free (ce_s, heap, rhsc);
> + return true;
> + }
> + CASE_BUILT_IN_TM_LOAD (1):
> + CASE_BUILT_IN_TM_LOAD (2):
> + CASE_BUILT_IN_TM_LOAD (4):
> + CASE_BUILT_IN_TM_LOAD (8):
> + CASE_BUILT_IN_TM_LOAD (FLOAT):
> + CASE_BUILT_IN_TM_LOAD (DOUBLE):
> + CASE_BUILT_IN_TM_LOAD (LDOUBLE):
> + CASE_BUILT_IN_TM_LOAD (M64):
> + CASE_BUILT_IN_TM_LOAD (M128):
> + CASE_BUILT_IN_TM_LOAD (M256):
> + {
> + tree dest = gimple_call_lhs (t);
> + tree addr = gimple_call_arg (t, 0);
> +
> + get_constraint_for (dest, &lhsc);
> + get_constraint_for (addr, &rhsc);
> + do_deref (&rhsc);
> + process_all_all_constraints (lhsc, rhsc);
> + VEC_free (ce_s, heap, lhsc);
> + VEC_free (ce_s, heap, rhsc);
> + return true;
> + }
> /* Variadic argument handling needs to be handled in IPA
> mode as well. */
> case BUILT_IN_VA_START:
> Index: gcc/tree-cfg.c
> ===================================================================
> --- gcc/tree-cfg.c (.../trunk) (revision 180744)
> +++ gcc/tree-cfg.c (.../branches/transactional-memory) (revision
> 180773)
> @@ -666,6 +666,15 @@ make_edges (void)
> }
> break;
>
> + case GIMPLE_TRANSACTION:
> + {
> + tree abort_label = gimple_transaction_label (last);
> + if (abort_label)
> + make_edge (bb, label_to_block (abort_label), 0);
> + fallthru = true;
> + }
> + break;
> +
> default:
> gcc_assert (!stmt_ends_bb_p (last));
> fallthru = true;
> @@ -1196,22 +1205,30 @@ cleanup_dead_labels (void)
> FOR_EACH_BB (bb)
> {
> gimple stmt = last_stmt (bb);
> + tree label, new_label;
> +
> if (!stmt)
> continue;
>
> switch (gimple_code (stmt))
> {
> case GIMPLE_COND:
> - {
> - tree true_label = gimple_cond_true_label (stmt);
> - tree false_label = gimple_cond_false_label (stmt);
> + label = gimple_cond_true_label (stmt);
> + if (label)
> + {
> + new_label = main_block_label (label);
> + if (new_label != label)
> + gimple_cond_set_true_label (stmt, new_label);
> + }
>
> - if (true_label)
> - gimple_cond_set_true_label (stmt, main_block_label
> (true_label));
> - if (false_label)
> - gimple_cond_set_false_label (stmt, main_block_label
> (false_label));
> - break;
> - }
> + label = gimple_cond_false_label (stmt);
> + if (label)
> + {
> + new_label = main_block_label (label);
> + if (new_label != label)
> + gimple_cond_set_false_label (stmt, new_label);
> + }
> + break;
>
> case GIMPLE_SWITCH:
> {
> @@ -1221,8 +1238,10 @@ cleanup_dead_labels (void)
> for (i = 0; i < n; ++i)
> {
> tree case_label = gimple_switch_label (stmt, i);
> - tree label = main_block_label (CASE_LABEL (case_label));
> - CASE_LABEL (case_label) = label;
> + label = CASE_LABEL (case_label);
> + new_label = main_block_label (label);
> + if (new_label != label)
> + CASE_LABEL (case_label) = new_label;
> }
> break;
> }
> @@ -1243,13 +1262,27 @@ cleanup_dead_labels (void)
> /* We have to handle gotos until they're removed, and we don't
> remove them until after we've created the CFG edges. */
> case GIMPLE_GOTO:
> - if (!computed_goto_p (stmt))
> + if (!computed_goto_p (stmt))
> {
> - tree new_dest = main_block_label (gimple_goto_dest (stmt));
> - gimple_goto_set_dest (stmt, new_dest);
> + label = gimple_goto_dest (stmt);
> + new_label = main_block_label (label);
> + if (new_label != label)
> + gimple_goto_set_dest (stmt, new_label);
What's the reason for this changes? Optimization?
> }
> break;
>
> + case GIMPLE_TRANSACTION:
> + {
> + tree label = gimple_transaction_label (stmt);
> + if (label)
> + {
> + tree new_label = main_block_label (label);
> + if (new_label != label)
> + gimple_transaction_set_label (stmt, new_label);
> + }
> + }
> + break;
> +
> default:
> break;
> }
> @@ -2263,6 +2296,13 @@ is_ctrl_altering_stmt (gimple t)
> if (flags & ECF_NORETURN)
> return true;
>
> + /* TM ending statements have backedges out of the transaction.
> + Return true so we split the basic block containing
> + them. */
> + if ((flags & ECF_TM_OPS)
> + && is_tm_ending_fndecl (gimple_call_fndecl (t)))
> + return true;
> +
> /* BUILT_IN_RETURN call is same as return statement. */
> if (gimple_call_builtin_p (t, BUILT_IN_RETURN))
> return true;
> @@ -2284,6 +2324,10 @@ is_ctrl_altering_stmt (gimple t)
> /* OpenMP directives alter control flow. */
> return true;
>
> + case GIMPLE_TRANSACTION:
> + /* A transaction start alters control flow. */
> + return true;
> +
> default:
> break;
> }
> @@ -4054,6 +4098,17 @@ verify_gimple_switch (gimple stmt)
> return false;
> }
>
> +/* Verify the contents of a GIMPLE_TRANSACTION. Returns true if there
> + is a problem, otherwise false. */
> +
> +static bool
> +verify_gimple_transaction (gimple stmt)
> +{
> + tree lab = gimple_transaction_label (stmt);
> + if (lab != NULL && TREE_CODE (lab) != LABEL_DECL)
> + return true;
ISTR this has substatements, so you should handle this in
verify_gimple_in_seq_2 and make sure to verify those substatements.
> + return false;
> +}
>
> /* Verify a gimple debug statement STMT.
> Returns true if anything is wrong. */
> @@ -4155,6 +4210,9 @@ verify_gimple_stmt (gimple stmt)
> case GIMPLE_ASM:
> return false;
>
> + case GIMPLE_TRANSACTION:
> + return verify_gimple_transaction (stmt);
> +
Not here.
> /* Tuples that do not have tree operands. */
> case GIMPLE_NOP:
> case GIMPLE_PREDICT:
> @@ -4271,10 +4329,19 @@ verify_gimple_in_seq_2 (gimple_seq stmts
> err |= verify_gimple_in_seq_2 (gimple_eh_filter_failure (stmt));
> break;
>
> + case GIMPLE_EH_ELSE:
> + err |= verify_gimple_in_seq_2 (gimple_eh_else_n_body (stmt));
> + err |= verify_gimple_in_seq_2 (gimple_eh_else_e_body (stmt));
> + break;
> +
> case GIMPLE_CATCH:
> err |= verify_gimple_in_seq_2 (gimple_catch_handler (stmt));
> break;
>
> + case GIMPLE_TRANSACTION:
> + err |= verify_gimple_in_seq_2 (gimple_transaction_body (stmt));
> + break;
> +
Ah, you do. But you'll never call your label verification code.
> default:
> {
> bool err2 = verify_gimple_stmt (stmt);
> @@ -5052,6 +5119,14 @@ gimple_redirect_edge_and_branch (edge e,
> redirect_eh_dispatch_edge (stmt, e, dest);
> break;
>
> + case GIMPLE_TRANSACTION:
> + /* The ABORT edge has a stored label associated with it, otherwise
> + the edges are simply redirectable. */
> + /* ??? We don't really need this label after the cfg is created. */
> + if (e->flags == 0)
> + gimple_transaction_set_label (stmt, gimple_block_label (dest));
So why set it (and thus keep it live)?
> + break;
> +
> default:
> /* Otherwise it must be a fallthru edge, and we don't need to
> do anything besides redirecting it. */
> @@ -6428,8 +6503,10 @@ dump_function_to_file (tree fn, FILE *fi
> bool ignore_topmost_bind = false, any_var = false;
> basic_block bb;
> tree chain;
> + bool tmclone = TREE_CODE (fn) == FUNCTION_DECL && DECL_IS_TM_CLONE (fn);
>
> - fprintf (file, "%s (", lang_hooks.decl_printable_name (fn, 2));
> + fprintf (file, "%s %s(", lang_hooks.decl_printable_name (fn, 2),
> + tmclone ? "[tm-clone] " : "");
>
> arg = DECL_ARGUMENTS (fn);
> while (arg)
> Index: gcc/passes.c
> ===================================================================
> --- gcc/passes.c (.../trunk) (revision 180744)
> +++ gcc/passes.c (.../branches/transactional-memory) (revision
> 180773)
> @@ -1174,9 +1174,11 @@ init_optimization_passes (void)
> p = &all_lowering_passes;
> NEXT_PASS (pass_warn_unused_result);
> NEXT_PASS (pass_diagnose_omp_blocks);
> + NEXT_PASS (pass_diagnose_tm_blocks);
> NEXT_PASS (pass_mudflap_1);
> NEXT_PASS (pass_lower_omp);
> NEXT_PASS (pass_lower_cf);
> + NEXT_PASS (pass_lower_tm);
> NEXT_PASS (pass_refactor_eh);
> NEXT_PASS (pass_lower_eh);
> NEXT_PASS (pass_build_cfg);
> @@ -1241,6 +1243,7 @@ init_optimization_passes (void)
> }
> NEXT_PASS (pass_ipa_increase_alignment);
> NEXT_PASS (pass_ipa_matrix_reorg);
> + NEXT_PASS (pass_ipa_tm);
> NEXT_PASS (pass_ipa_lower_emutls);
> *p = NULL;
>
> @@ -1400,6 +1403,13 @@ init_optimization_passes (void)
> NEXT_PASS (pass_uncprop);
> NEXT_PASS (pass_local_pure_const);
> }
> + NEXT_PASS (pass_tm_init);
> + {
> + struct opt_pass **p = &pass_tm_init.pass.sub;
> + NEXT_PASS (pass_tm_mark);
> + NEXT_PASS (pass_tm_memopt);
> + NEXT_PASS (pass_tm_edges);
> + }
> NEXT_PASS (pass_lower_complex_O0);
> NEXT_PASS (pass_cleanup_eh);
> NEXT_PASS (pass_lower_resx);
> Index: gcc/reg-notes.def
> ===================================================================
> --- gcc/reg-notes.def (.../trunk) (revision 180744)
> +++ gcc/reg-notes.def (.../branches/transactional-memory) (revision
> 180773)
> @@ -203,6 +203,11 @@ REG_NOTE (CROSSING_JUMP)
> functions that can return twice. */
> REG_NOTE (SETJMP)
>
> +/* This kind of note is generated at each transactional memory
> + builtin, to indicate we need to generate transaction restart
> + edges for this insn. */
> +REG_NOTE (TM)
> +
> /* Indicates the cumulative offset of the stack pointer accounting
> for pushed arguments. This will only be generated when
> ACCUMULATE_OUTGOING_ARGS is false. */
> Index: gcc/cfgrtl.c
> ===================================================================
> --- gcc/cfgrtl.c (.../trunk) (revision 180744)
> +++ gcc/cfgrtl.c (.../branches/transactional-memory) (revision
> 180773)
> @@ -2246,6 +2246,8 @@ purge_dead_edges (basic_block bb)
> ;
> else if ((e->flags & EDGE_EH) && can_throw_internal (insn))
> ;
> + else if (flag_tm && find_reg_note (insn, REG_TM, NULL))
> + ;
> else
> remove = true;
> }
> Index: gcc/params.def
> ===================================================================
> --- gcc/params.def (.../trunk) (revision 180744)
> +++ gcc/params.def (.../branches/transactional-memory) (revision
> 180773)
> @@ -872,6 +872,13 @@ DEFPARAM (PARAM_IPA_SRA_PTR_GROWTH_FACTO
> "a pointer to an aggregate with",
> 2, 0, 0)
>
> +DEFPARAM (PARAM_TM_MAX_AGGREGATE_SIZE,
> + "tm-max-aggregate-size",
> + "Size in bytes after which thread-local aggregates should be "
> + "instrumented with the logging functions instead of save/restore "
> + "pairs",
> + 9, 0, 0)
> +
> DEFPARAM (PARAM_IPA_CP_VALUE_LIST_SIZE,
> "ipa-cp-value-list-size",
> "Maximum size of a list of values associated with each parameter
> for "
>
More information about the Gcc-patches
mailing list