[patch] 19/n: trans-mem: middle end/misc patches (LAST PATCH)

Richard Guenther richard.guenther@gmail.com
Fri Nov 4 11:22:00 GMT 2011


On Thu, Nov 3, 2011 at 8:32 PM, Aldy Hernandez <aldyh@redhat.com> wrote:
> This is everything else that doesn't fit neatly into any other category.
>  Here are the middle end changes, as well as pass ordering code, along with
> varasm and a potpourri of other small changes.
>
> This is the last patch.  Please let me know if there is anything else
> (reasonable) you would like me to post.
>
> Index: gcc/cgraph.h
> ===================================================================
> --- gcc/cgraph.h        (.../trunk)     (revision 180744)
> +++ gcc/cgraph.h        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -98,6 +98,9 @@ struct GTY(()) cgraph_local_info {
>   /* True when the function has been originally extern inline, but it is
>      redefined now.  */
>   unsigned redefined_extern_inline : 1;
> +
> +  /* True if the function may enter serial irrevocable mode.  */
> +  unsigned tm_may_enter_irr : 1;
>  };
>
>  /* Information about the function that needs to be computed globally
> @@ -565,6 +568,8 @@ void verify_cgraph_node (struct cgraph_n
>  void cgraph_build_static_cdtor (char which, tree body, int priority);
>  void cgraph_reset_static_var_maps (void);
>  void init_cgraph (void);
> +struct cgraph_node * cgraph_copy_node_for_versioning (struct cgraph_node *,
> +               tree, VEC(cgraph_edge_p,heap)*, bitmap);
>  struct cgraph_node *cgraph_function_versioning (struct cgraph_node *,
>                                                VEC(cgraph_edge_p,heap)*,
>                                                VEC(ipa_replace_map_p,gc)*,
> Index: gcc/tree-pass.h
> ===================================================================
> --- gcc/tree-pass.h     (.../trunk)     (revision 180744)
> +++ gcc/tree-pass.h     (.../branches/transactional-memory)     (revision
> 180773)
> @@ -447,6 +447,12 @@ extern struct gimple_opt_pass pass_build
>  extern struct gimple_opt_pass pass_local_pure_const;
>  extern struct gimple_opt_pass pass_tracer;
>  extern struct gimple_opt_pass pass_warn_unused_result;
> +extern struct gimple_opt_pass pass_diagnose_tm_blocks;
> +extern struct gimple_opt_pass pass_lower_tm;
> +extern struct gimple_opt_pass pass_tm_init;
> +extern struct gimple_opt_pass pass_tm_mark;
> +extern struct gimple_opt_pass pass_tm_memopt;
> +extern struct gimple_opt_pass pass_tm_edges;
>  extern struct gimple_opt_pass pass_split_functions;
>  extern struct gimple_opt_pass pass_feedback_split_functions;
>
> @@ -469,6 +475,7 @@ extern struct ipa_opt_pass_d pass_ipa_pu
>  extern struct simple_ipa_opt_pass pass_ipa_pta;
>  extern struct ipa_opt_pass_d pass_ipa_lto_wpa_fixup;
>  extern struct ipa_opt_pass_d pass_ipa_lto_finish_out;
> +extern struct simple_ipa_opt_pass pass_ipa_tm;
>  extern struct ipa_opt_pass_d pass_ipa_profile;
>  extern struct ipa_opt_pass_d pass_ipa_cdtor_merge;
>
> Index: gcc/rtlanal.c
> ===================================================================
> --- gcc/rtlanal.c       (.../trunk)     (revision 180744)
> +++ gcc/rtlanal.c       (.../branches/transactional-memory)     (revision
> 180773)
> @@ -1918,6 +1918,7 @@ alloc_reg_note (enum reg_note kind, rtx
>     case REG_CC_USER:
>     case REG_LABEL_TARGET:
>     case REG_LABEL_OPERAND:
> +    case REG_TM:
>       /* These types of register notes use an INSN_LIST rather than an
>         EXPR_LIST, so that copying is done right and dumps look
>         better.  */
> Index: gcc/omp-low.c
> ===================================================================
> --- gcc/omp-low.c       (.../trunk)     (revision 180744)
> +++ gcc/omp-low.c       (.../branches/transactional-memory)     (revision
> 180773)
> @@ -139,6 +139,7 @@ static tree scan_omp_1_op (tree *, int *
>     case GIMPLE_TRY: \
>     case GIMPLE_CATCH: \
>     case GIMPLE_EH_FILTER: \
> +    case GIMPLE_TRANSACTION: \
>       /* The sub-statements for these should be walked.  */ \
>       *handled_ops_p = false; \
>       break;
> Index: gcc/toplev.c
> ===================================================================
> --- gcc/toplev.c        (.../trunk)     (revision 180744)
> +++ gcc/toplev.c        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -599,6 +599,7 @@ compile_file (void)
>
>       output_shared_constant_pool ();
>       output_object_blocks ();
> +  finish_tm_clone_pairs ();
>       /* Write out any pending weak symbol declarations.  */
>       weak_finish ();
> Index: gcc/cgraphunit.c
> ===================================================================
> --- gcc/cgraphunit.c    (.../trunk)     (revision 180744)
> +++ gcc/cgraphunit.c    (.../branches/transactional-memory)     (revision
> 180773)
> @@ -2272,7 +2272,7 @@ update_call_expr (struct cgraph_node *ne
>    was copied to prevent duplications of calls that are dead
>    in the clone.  */
>
> -static struct cgraph_node *
> +struct cgraph_node *
>  cgraph_copy_node_for_versioning (struct cgraph_node *old_version,
>                                 tree new_decl,
>                                 VEC(cgraph_edge_p,heap) *redirect_callers,
> @@ -2286,7 +2286,7 @@ cgraph_copy_node_for_versioning (struct
>
>    new_version = cgraph_create_node (new_decl);
>
> -   new_version->analyzed = true;
> +   new_version->analyzed = old_version->analyzed;

Hm?  analyzed means "with body", sure you have a body if you clone.

>    new_version->local = old_version->local;
>    new_version->local.externally_visible = false;
>    new_version->local.local = true;
> @@ -2294,6 +2294,7 @@ cgraph_copy_node_for_versioning (struct
>    new_version->rtl = old_version->rtl;
>    new_version->reachable = true;
>    new_version->count = old_version->count;
> +   new_version->lowered = true;

OTOH this isn't necessary true.  cgraph exists before lowering.

>    for (e = old_version->callees; e; e=e->next_callee)
>      if (!bbs_to_copy
> @@ -2389,7 +2390,6 @@ cgraph_function_versioning (struct cgrap
>   DECL_VIRTUAL_P (new_version_node->decl) = 0;
>   new_version_node->local.externally_visible = 0;
>   new_version_node->local.local = 1;
> -  new_version_node->lowered = true;
>
>   /* Update the call_expr on the edges to call the new version node. */
>   update_call_expr (new_version_node);
> Index: gcc/tree-ssa-alias.c
> ===================================================================
> --- gcc/tree-ssa-alias.c        (.../trunk)     (revision 180744)
> +++ gcc/tree-ssa-alias.c        (.../branches/transactional-memory)
> (revision 180773)
> @@ -1182,6 +1182,8 @@ ref_maybe_used_by_call_p_1 (gimple call,
>        case BUILT_IN_MEMPCPY:
>        case BUILT_IN_STPCPY:
>        case BUILT_IN_STPNCPY:
> +        case BUILT_IN_TM_MEMCPY:
> +        case BUILT_IN_TM_MEMMOVE:
>          {
>            ao_ref dref;
>            tree size = NULL_TREE;
> @@ -1228,6 +1230,32 @@ ref_maybe_used_by_call_p_1 (gimple call,
>                                           size);
>            return refs_may_alias_p_1 (&dref, ref, false);
>          }
> +
> +        /* The following functions read memory pointed to by their
> +          first argument.  */
> +       CASE_BUILT_IN_TM_LOAD (1):
> +       CASE_BUILT_IN_TM_LOAD (2):
> +       CASE_BUILT_IN_TM_LOAD (4):
> +       CASE_BUILT_IN_TM_LOAD (8):
> +        CASE_BUILT_IN_TM_LOAD (FLOAT):
> +       CASE_BUILT_IN_TM_LOAD (DOUBLE):
> +       CASE_BUILT_IN_TM_LOAD (LDOUBLE):
> +       CASE_BUILT_IN_TM_LOAD (M64):
> +       CASE_BUILT_IN_TM_LOAD (M128):
> +       CASE_BUILT_IN_TM_LOAD (M256):
> +        case BUILT_IN_TM_LOG:
> +        case BUILT_IN_TM_LOG_1:
> +        case BUILT_IN_TM_LOG_2:
> +        case BUILT_IN_TM_LOG_4:
> +        case BUILT_IN_TM_LOG_8:
> +        case BUILT_IN_TM_LOG_FLOAT:
> +        case BUILT_IN_TM_LOG_DOUBLE:
> +        case BUILT_IN_TM_LOG_LDOUBLE:
> +        case BUILT_IN_TM_LOG_M64:
> +        case BUILT_IN_TM_LOG_M128:
> +        case BUILT_IN_TM_LOG_M256:
> +         return ptr_deref_may_alias_ref_p_1 (gimple_call_arg (call, 0),
> ref);
> +
>        /* These read memory pointed to by the first argument.  */
>        case BUILT_IN_STRDUP:
>        case BUILT_IN_STRNDUP:
> @@ -1250,6 +1278,7 @@ ref_maybe_used_by_call_p_1 (gimple call,
>        case BUILT_IN_STACK_SAVE:
>        case BUILT_IN_STACK_RESTORE:
>        case BUILT_IN_MEMSET:
> +        case BUILT_IN_TM_MEMSET:
>        case BUILT_IN_MEMSET_CHK:
>        case BUILT_IN_FREXP:
>        case BUILT_IN_FREXPF:
> @@ -1480,6 +1509,19 @@ call_may_clobber_ref_p_1 (gimple call, a
>        case BUILT_IN_STRCAT:
>        case BUILT_IN_STRNCAT:
>        case BUILT_IN_MEMSET:
> +        case BUILT_IN_TM_MEMSET:
> +        CASE_BUILT_IN_TM_STORE (1):
> +        CASE_BUILT_IN_TM_STORE (2):
> +        CASE_BUILT_IN_TM_STORE (4):
> +        CASE_BUILT_IN_TM_STORE (8):
> +        CASE_BUILT_IN_TM_STORE (FLOAT):
> +        CASE_BUILT_IN_TM_STORE (DOUBLE):
> +        CASE_BUILT_IN_TM_STORE (LDOUBLE):
> +        CASE_BUILT_IN_TM_STORE (M64):
> +        CASE_BUILT_IN_TM_STORE (M128):
> +        CASE_BUILT_IN_TM_STORE (M256):
> +        case BUILT_IN_TM_MEMCPY:
> +        case BUILT_IN_TM_MEMMOVE:
>          {
>            ao_ref dref;
>            tree size = NULL_TREE;
> Index: gcc/ipa-inline.c
> ===================================================================
> --- gcc/ipa-inline.c    (.../trunk)     (revision 180744)
> +++ gcc/ipa-inline.c    (.../branches/transactional-memory)     (revision
> 180773)
> @@ -284,6 +284,15 @@ can_inline_edge_p (struct cgraph_edge *e
>       e->inline_failed = CIF_EH_PERSONALITY;
>       inlinable = false;
>     }
> +  /* TM pure functions should not get inlined if the outer function is
> +     a TM safe function.  */
> +  else if (flag_tm

Please move flag checks into the respective prediates.  Any reason
why the is_tm_pure () predicate wouldn't already do the correct thing
with !flag_tm?

> +          && is_tm_pure (callee->decl)
> +          && is_tm_safe (e->caller->decl))
> +    {
> +      e->inline_failed = CIF_UNSPECIFIED;
> +      inlinable = false;
> +    }
>   /* Don't inline if the callee can throw non-call exceptions but the
>      caller cannot.
>      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is
> missing.
> Index: gcc/crtstuff.c
> ===================================================================
> --- gcc/crtstuff.c      (.../trunk)     (revision 180744)
> +++ gcc/crtstuff.c      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -162,6 +162,9 @@ extern void __do_global_ctors_1 (void);
>  /* Likewise for _Jv_RegisterClasses.  */
>  extern void _Jv_RegisterClasses (void *) TARGET_ATTRIBUTE_WEAK;
>
> +extern void _ITM_registerTMCloneTable (void *, size_t)
> TARGET_ATTRIBUTE_WEAK;
> +extern void _ITM_deregisterTMCloneTable (void *) TARGET_ATTRIBUTE_WEAK;
> +
>  #ifdef OBJECT_FORMAT_ELF
>
>  /*  Declare a pointer to void function type.  */
> @@ -241,6 +244,11 @@ STATIC void *__JCR_LIST__[]
>   = { };
>  #endif /* JCR_SECTION_NAME */
>
> +STATIC func_ptr __TMC_LIST__[]
> +  __attribute__((unused, section(".tm_clone_table"),
> aligned(sizeof(void*))))
> +  = { };
> +extern func_ptr __TMC_END__[] __attribute__((__visibility__ ("hidden")));
> +
>  #if defined(INIT_SECTION_ASM_OP) || defined(INIT_ARRAY_SECTION_ASM_OP)
>
>  #ifdef OBJECT_FORMAT_ELF
> @@ -330,6 +338,13 @@ __do_global_dtors_aux (void)
>   }
>  #endif /* !defined(FINI_ARRAY_SECTION_ASM_OP) */
>
> +  if (_ITM_deregisterTMCloneTable)
> +    {
> +      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
> +      if (size > 0)
> +       _ITM_deregisterTMCloneTable (__TMC_LIST__);
> +    }
> +
>  #ifdef USE_EH_FRAME_REGISTRY
>  #ifdef CRT_GET_RFIB_DATA
>   /* If we used the new __register_frame_info_bases interface,
> @@ -391,6 +406,12 @@ frame_dummy (void)
>        register_classes (__JCR_LIST__);
>     }
>  #endif /* JCR_SECTION_NAME */
> +  if (_ITM_registerTMCloneTable)
> +    {
> +      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
> +      if (size > 0)
> +       _ITM_registerTMCloneTable (__TMC_LIST__, size);
> +    }
>  }
>
>  #ifdef INIT_SECTION_ASM_OP
> @@ -457,6 +478,13 @@ __do_global_dtors (void)
>   for (p = __DTOR_LIST__ + 1; (f = *p); p++)
>     f ();
>
> +  if (_ITM_deregisterTMCloneTable)
> +    {
> +      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
> +      if (size > 0)
> +       _ITM_deregisterTMCloneTable (__TMC_LIST__);
> +    }
> +
>  #ifdef USE_EH_FRAME_REGISTRY
>   if (__deregister_frame_info)
>     __deregister_frame_info (__EH_FRAME_BEGIN__);
> @@ -570,6 +598,11 @@ STATIC void *__JCR_END__[1]
>    = { 0 };
>  #endif /* JCR_SECTION_NAME */
>
> +func_ptr __TMC_END__[]
> +  __attribute__((unused, section(".tm_clone_table"), aligned(sizeof(void
> *)),
> +                __visibility__ ("hidden")))
> +  = { };
> +
>  #ifdef INIT_ARRAY_SECTION_ASM_OP
>
>  /* If we are using .init_array, there is nothing to do.  */
> Index: gcc/cfgbuild.c
> ===================================================================
> --- gcc/cfgbuild.c      (.../trunk)     (revision 180744)
> +++ gcc/cfgbuild.c      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -338,18 +338,30 @@ make_edges (basic_block min, basic_block
>          /* Add any appropriate EH edges.  */
>          rtl_make_eh_edge (edge_cache, bb, insn);
>
> -         if (code == CALL_INSN && nonlocal_goto_handler_labels)
> +         if (code == CALL_INSN)
>            {
> -             /* ??? This could be made smarter: in some cases it's possible
> -                to tell that certain calls will not do a nonlocal goto.
> -                For example, if the nested functions that do the nonlocal
> -                gotos do not have their addresses taken, then only calls to
> -                those functions or to other nested functions that use them
> -                could possibly do nonlocal gotos.  */
>              if (can_nonlocal_goto (insn))
> -               for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1))
> -                 make_label_edge (edge_cache, bb, XEXP (x, 0),
> -                                  EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
> +               {
> +                 /* ??? This could be made smarter: in some cases it's
> +                    possible to tell that certain calls will not do a
> +                    nonlocal goto.  For example, if the nested functions
> +                    that do the nonlocal gotos do not have their addresses
> +                    taken, then only calls to those functions or to other
> +                    nested functions that use them could possibly do
> +                    nonlocal gotos.  */
> +                 for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1))
> +                   make_label_edge (edge_cache, bb, XEXP (x, 0),
> +                                    EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
> +               }
> +
> +             if (flag_tm)
> +               {
> +                 rtx note;
> +                 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
> +                   if (REG_NOTE_KIND (note) == REG_TM)
> +                     make_label_edge (edge_cache, bb, XEXP (note, 0),
> +                                      EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
> +               }
>            }
>        }
>
> Index: gcc/timevar.def
> ===================================================================
> --- gcc/timevar.def     (.../trunk)     (revision 180744)
> +++ gcc/timevar.def     (.../branches/transactional-memory)     (revision
> 180773)
> @@ -184,6 +184,7 @@ DEFTIMEVAR (TV_TREE_COPY_RENAME          , "
>  DEFTIMEVAR (TV_TREE_SSA_VERIFY       , "tree SSA verifier")
>  DEFTIMEVAR (TV_TREE_STMT_VERIFY      , "tree STMT verifier")
>  DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch initialization
> conversion")
> +DEFTIMEVAR (TV_TRANS_MEM             , "transactional memory")
>  DEFTIMEVAR (TV_TREE_STRLEN           , "tree strlen optimization")
>  DEFTIMEVAR (TV_CGRAPH_VERIFY         , "callgraph verifier")
>  DEFTIMEVAR (TV_DOM_FRONTIERS         , "dominance frontiers")
> Index: gcc/recog.c
> ===================================================================
> --- gcc/recog.c (.../trunk)     (revision 180744)
> +++ gcc/recog.c (.../branches/transactional-memory)     (revision 180773)
> @@ -3287,6 +3287,7 @@ peep2_attempt (basic_block bb, rtx insn,
>          {
>          case REG_NORETURN:
>          case REG_SETJMP:
> +         case REG_TM:
>            add_reg_note (new_insn, REG_NOTE_KIND (note),
>                          XEXP (note, 0));
>            break;
> Index: gcc/function.h
> ===================================================================
> --- gcc/function.h      (.../trunk)     (revision 180744)
> +++ gcc/function.h      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -467,6 +467,14 @@ extern GTY(()) struct rtl_data x_rtl;
>    want to do differently.  */
>  #define crtl (&x_rtl)
>
> +/* This structure is used to map a gimple statement to a label,
> +   or list of labels to represent transaction restart.  */
> +
> +struct GTY(()) tm_restart_node {
> +  gimple stmt;
> +  tree label_or_list;
> +};
> +
>  struct GTY(()) stack_usage
>  {
>   /* # of bytes of static stack space allocated by the function.  */
> @@ -518,6 +526,10 @@ struct GTY(()) function {
>   /* Value histograms attached to particular statements.  */
>   htab_t GTY((skip)) value_histograms;
>
> +  /* Map gimple stmt to tree label (or list of labels) for transaction
> +     restart and abort.  */
> +  htab_t GTY ((param_is (struct tm_restart_node))) tm_restart;
> +

As this maps 'gimple' to tree shouldn't this go to fn->gimple_df instead?
That way you avoid growing generic struct function.  Or in to eh_status,
if that looks like a better fit.

>   /* For function.c.  */
>
>   /* Points to the FUNCTION_DECL of this function.  */
> Index: gcc/emit-rtl.c
> ===================================================================
> --- gcc/emit-rtl.c      (.../trunk)     (revision 180744)
> +++ gcc/emit-rtl.c      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -3595,6 +3595,7 @@ try_split (rtx pat, rtx trial, int last)
>
>        case REG_NORETURN:
>        case REG_SETJMP:
> +       case REG_TM:
>          for (insn = insn_last; insn != NULL_RTX; insn = PREV_INSN (insn))
>            {
>              if (CALL_P (insn))
> Index: gcc/cfgexpand.c
> ===================================================================
> --- gcc/cfgexpand.c     (.../trunk)     (revision 180744)
> +++ gcc/cfgexpand.c     (.../branches/transactional-memory)     (revision
> 180773)
> @@ -2096,6 +2096,32 @@ expand_gimple_stmt (gimple stmt)
>        }
>     }
>
> +  /* Mark all calls that can have a transaction restart.  */

Why isn't this done when we expand the call?  This walking of the
RTL sequence looks like a hack (an easy one, albeit).

> +  if (cfun->tm_restart && is_gimple_call (stmt))
> +    {
> +      struct tm_restart_node dummy;
> +      void **slot;
> +
> +      dummy.stmt = stmt;
> +      slot = htab_find_slot (cfun->tm_restart, &dummy, NO_INSERT);
> +      if (slot)
> +       {
> +         struct tm_restart_node *n = (struct tm_restart_node *) *slot;
> +         tree list = n->label_or_list;
> +         rtx insn;
> +
> +         for (insn = next_real_insn (last); !CALL_P (insn);
> +              insn = next_real_insn (insn))
> +           continue;
> +
> +         if (TREE_CODE (list) == LABEL_DECL)
> +           add_reg_note (insn, REG_TM, label_rtx (list));
> +         else
> +           for (; list ; list = TREE_CHAIN (list))
> +             add_reg_note (insn, REG_TM, label_rtx (TREE_VALUE (list)));
> +       }
> +    }
> +
>   return last;
>  }
>
> @@ -4455,6 +4481,10 @@ gimple_expand_cfg (void)
>   /* After expanding, the return labels are no longer needed. */
>   return_label = NULL;
>   naked_return_label = NULL;
> +
> +  /* After expanding, the tm_restart map is no longer needed.  */
> +  cfun->tm_restart = NULL;

You should still free it, to not confuse the statistics code I think.

> +
>   /* Tag the blocks with a depth number so that change_scope can find
>      the common parent easily.  */
>   set_block_levels (DECL_INITIAL (cfun->decl), 0);
> Index: gcc/varasm.c
> ===================================================================
> --- gcc/varasm.c        (.../trunk)     (revision 180744)
> +++ gcc/varasm.c        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -5859,6 +5859,103 @@ assemble_alias (tree decl, tree target)
>     }
>  }
>
> +/* Record and output a table of translations from original function
> +   to its transaction aware clone.  Note that tm_pure functions are
> +   considered to be their own clone.  */
> +
> +static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
> +     htab_t tm_clone_pairs;
> +
> +void
> +record_tm_clone_pair (tree o, tree n)
> +{
> +  struct tree_map **slot, *h;
> +
> +  if (tm_clone_pairs == NULL)
> +    tm_clone_pairs = htab_create_ggc (32, tree_map_hash, tree_map_eq, 0);
> +
> +  h = ggc_alloc_tree_map ();
> +  h->hash = htab_hash_pointer (o);
> +  h->base.from = o;
> +  h->to = n;
> +
> +  slot = (struct tree_map **)
> +    htab_find_slot_with_hash (tm_clone_pairs, h, h->hash, INSERT);
> +  *slot = h;
> +}
> +
> +tree
> +get_tm_clone_pair (tree o)
> +{
> +  if (tm_clone_pairs)
> +    {
> +      struct tree_map *h, in;
> +
> +      in.base.from = o;
> +      in.hash = htab_hash_pointer (o);
> +      h = (struct tree_map *) htab_find_with_hash (tm_clone_pairs,
> +                                                  &in, in.hash);
> +      if (h)
> +       return h->to;
> +    }
> +  return NULL_TREE;
> +}
> +
> +/* Helper function for finish_tm_clone_pairs.  Dump the clone table.  */
> +
> +int
> +finish_tm_clone_pairs_1 (void **slot, void *info ATTRIBUTE_UNUSED)
> +{
> +  struct tree_map *map = (struct tree_map *) *slot;
> +  bool *switched = (bool *) info;
> +  tree src = map->base.from;
> +  tree dst = map->to;
> +  struct cgraph_node *src_n = cgraph_get_node (src);
> +  struct cgraph_node *dst_n = cgraph_get_node (dst);
> +
> +  /* The function ipa_tm_create_version() marks the clone as needed if
> +     the original function was needed.  But we also mark the clone as
> +     needed if we ever called the clone indirectly through
> +     TM_GETTMCLONE.  If neither of these are true, we didn't generate
> +     a clone, and we didn't call it indirectly... no sense keeping it
> +     in the clone table.  */
> +  if (!dst_n || !dst_n->needed)
> +    return 1;
> +
> +  /* This covers the case where we have optimized the original
> +     function away, and only access the transactional clone.  */
> +  if (!src_n || !src_n->needed)
> +    return 1;
> +
> +  if (!*switched)
> +    {
> +      switch_to_section (get_named_section (NULL, ".tm_clone_table", 3));
> +      assemble_align (POINTER_SIZE);
> +      *switched = true;
> +    }
> +
> +  assemble_integer (XEXP (DECL_RTL (src), 0),
> +                   POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
> +  assemble_integer (XEXP (DECL_RTL (dst), 0),
> +                   POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
> +  return 1;
> +}
> +
> +void
> +finish_tm_clone_pairs (void)
> +{
> +  bool switched = false;
> +
> +  if (tm_clone_pairs == NULL)
> +    return;
> +
> +  htab_traverse_noresize (tm_clone_pairs, finish_tm_clone_pairs_1,
> +                         (void *) &switched);

This makes the generated table dependent on memory layout.  You
need to walk the pairs in some deterministic order.  In fact why not
walk all cgraph_nodes looking for the pairs - they should be still
in the list of clones for a node and you've marked it with DECL_TM_CLONE.
You can then sort them by cgraph node uid.

Did you check bootstrapping GCC with TM enabled and address-space
randomization turned on?

> +  htab_delete (tm_clone_pairs);
> +  tm_clone_pairs = NULL;
> +}
> +
> +
>  /* Emit an assembler directive to set symbol for DECL visibility to
>    the visibility type VIS, which must not be VISIBILITY_DEFAULT.  */
>
> Index: gcc/output.h
> ===================================================================
> --- gcc/output.h        (.../trunk)     (revision 180744)
> +++ gcc/output.h        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -606,6 +606,11 @@ extern bool unlikely_text_section_p (sec
>  extern void switch_to_section (section *);
>  extern void output_section_asm_op (const void *);
>
> +extern void record_tm_clone_pair (tree, tree);
> +extern void finish_tm_clone_pairs (void);
> +extern int finish_tm_clone_pairs_1 (void **, void *);
> +extern tree get_tm_clone_pair (tree);
> +
>  extern void default_asm_output_source_filename (FILE *, const char *);
>  extern void output_file_directive (FILE *, const char *);
>
> Index: gcc/combine.c
> ===================================================================
> --- gcc/combine.c       (.../trunk)     (revision 180744)
> +++ gcc/combine.c       (.../branches/transactional-memory)     (revision
> 180773)
> @@ -13286,6 +13286,7 @@ distribute_notes (rtx notes, rtx from_in
>
>        case REG_NORETURN:
>        case REG_SETJMP:
> +       case REG_TM:
>          /* These notes must remain with the call.  It should not be
>             possible for both I2 and I3 to be a call.  */
>          if (CALL_P (i3))
> Index: gcc/tree-flow.h
> ===================================================================
> --- gcc/tree-flow.h     (.../trunk)     (revision 180744)
> +++ gcc/tree-flow.h     (.../branches/transactional-memory)     (revision
> 180773)
> @@ -778,6 +778,9 @@ extern bool maybe_duplicate_eh_stmt (gim
>  extern bool verify_eh_edges (gimple);
>  extern bool verify_eh_dispatch_edge (gimple);
>
> +/* In gtm-low.c  */
> +extern bool is_transactional_stmt (const_gimple);
> +

gimple.h please.  looks like a gimple predicate as well, so the implementation
should be in gimple.c?

>  /* In tree-ssa-pre.c  */
>  struct pre_expr_d;
>  void add_to_value (unsigned int, struct pre_expr_d *);
> Index: gcc/tree-ssa-structalias.c
> ===================================================================
> --- gcc/tree-ssa-structalias.c  (.../trunk)     (revision 180744)
> +++ gcc/tree-ssa-structalias.c  (.../branches/transactional-memory)
> (revision 180773)
> @@ -4024,6 +4024,8 @@ find_func_aliases_for_builtin_call (gimp
>       case BUILT_IN_STPCPY_CHK:
>       case BUILT_IN_STRCAT_CHK:
>       case BUILT_IN_STRNCAT_CHK:
> +      case BUILT_IN_TM_MEMCPY:
> +      case BUILT_IN_TM_MEMMOVE:
>        {
>          tree res = gimple_call_lhs (t);
>          tree dest = gimple_call_arg (t, (DECL_FUNCTION_CODE (fndecl)
> @@ -4056,6 +4058,7 @@ find_func_aliases_for_builtin_call (gimp
>        }
>       case BUILT_IN_MEMSET:
>       case BUILT_IN_MEMSET_CHK:
> +      case BUILT_IN_TM_MEMSET:
>        {
>          tree res = gimple_call_lhs (t);
>          tree dest = gimple_call_arg (t, 0);
> @@ -4197,6 +4200,50 @@ find_func_aliases_for_builtin_call (gimp
>            }
>          return true;
>        }
> +      CASE_BUILT_IN_TM_STORE (1):
> +      CASE_BUILT_IN_TM_STORE (2):
> +      CASE_BUILT_IN_TM_STORE (4):
> +      CASE_BUILT_IN_TM_STORE (8):
> +      CASE_BUILT_IN_TM_STORE (FLOAT):
> +      CASE_BUILT_IN_TM_STORE (DOUBLE):
> +      CASE_BUILT_IN_TM_STORE (LDOUBLE):
> +      CASE_BUILT_IN_TM_STORE (M64):
> +      CASE_BUILT_IN_TM_STORE (M128):
> +      CASE_BUILT_IN_TM_STORE (M256):
> +       {
> +         tree addr = gimple_call_arg (t, 0);
> +         tree src = gimple_call_arg (t, 1);
> +
> +         get_constraint_for (addr, &lhsc);
> +         do_deref (&lhsc);
> +         get_constraint_for (src, &rhsc);
> +         process_all_all_constraints (lhsc, rhsc);
> +         VEC_free (ce_s, heap, lhsc);
> +         VEC_free (ce_s, heap, rhsc);
> +         return true;
> +       }
> +      CASE_BUILT_IN_TM_LOAD (1):
> +      CASE_BUILT_IN_TM_LOAD (2):
> +      CASE_BUILT_IN_TM_LOAD (4):
> +      CASE_BUILT_IN_TM_LOAD (8):
> +      CASE_BUILT_IN_TM_LOAD (FLOAT):
> +      CASE_BUILT_IN_TM_LOAD (DOUBLE):
> +      CASE_BUILT_IN_TM_LOAD (LDOUBLE):
> +      CASE_BUILT_IN_TM_LOAD (M64):
> +      CASE_BUILT_IN_TM_LOAD (M128):
> +      CASE_BUILT_IN_TM_LOAD (M256):
> +        {
> +         tree dest = gimple_call_lhs (t);
> +         tree addr = gimple_call_arg (t, 0);
> +
> +         get_constraint_for (dest, &lhsc);
> +         get_constraint_for (addr, &rhsc);
> +         do_deref (&rhsc);
> +         process_all_all_constraints (lhsc, rhsc);
> +         VEC_free (ce_s, heap, lhsc);
> +         VEC_free (ce_s, heap, rhsc);
> +         return true;
> +        }
>       /* Variadic argument handling needs to be handled in IPA
>         mode as well.  */
>       case BUILT_IN_VA_START:
> Index: gcc/tree-cfg.c
> ===================================================================
> --- gcc/tree-cfg.c      (.../trunk)     (revision 180744)
> +++ gcc/tree-cfg.c      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -666,6 +666,15 @@ make_edges (void)
>                }
>              break;
>
> +           case GIMPLE_TRANSACTION:
> +             {
> +               tree abort_label = gimple_transaction_label (last);
> +               if (abort_label)
> +                 make_edge (bb, label_to_block (abort_label), 0);
> +               fallthru = true;
> +             }
> +             break;
> +
>            default:
>              gcc_assert (!stmt_ends_bb_p (last));
>              fallthru = true;
> @@ -1196,22 +1205,30 @@ cleanup_dead_labels (void)
>   FOR_EACH_BB (bb)
>     {
>       gimple stmt = last_stmt (bb);
> +      tree label, new_label;
> +
>       if (!stmt)
>        continue;
>
>       switch (gimple_code (stmt))
>        {
>        case GIMPLE_COND:
> -         {
> -           tree true_label = gimple_cond_true_label (stmt);
> -           tree false_label = gimple_cond_false_label (stmt);
> +         label = gimple_cond_true_label (stmt);
> +         if (label)
> +           {
> +             new_label = main_block_label (label);
> +             if (new_label != label)
> +               gimple_cond_set_true_label (stmt, new_label);
> +           }
>
> -           if (true_label)
> -             gimple_cond_set_true_label (stmt, main_block_label
> (true_label));
> -           if (false_label)
> -             gimple_cond_set_false_label (stmt, main_block_label
> (false_label));
> -           break;
> -         }
> +         label = gimple_cond_false_label (stmt);
> +         if (label)
> +           {
> +             new_label = main_block_label (label);
> +             if (new_label != label)
> +               gimple_cond_set_false_label (stmt, new_label);
> +           }
> +         break;
>
>        case GIMPLE_SWITCH:
>          {
> @@ -1221,8 +1238,10 @@ cleanup_dead_labels (void)
>            for (i = 0; i < n; ++i)
>              {
>                tree case_label = gimple_switch_label (stmt, i);
> -               tree label = main_block_label (CASE_LABEL (case_label));
> -               CASE_LABEL (case_label) = label;
> +               label = CASE_LABEL (case_label);
> +               new_label = main_block_label (label);
> +               if (new_label != label)
> +                 CASE_LABEL (case_label) = new_label;
>              }
>            break;
>          }
> @@ -1243,13 +1262,27 @@ cleanup_dead_labels (void)
>        /* We have to handle gotos until they're removed, and we don't
>           remove them until after we've created the CFG edges.  */
>        case GIMPLE_GOTO:
> -          if (!computed_goto_p (stmt))
> +         if (!computed_goto_p (stmt))
>            {
> -             tree new_dest = main_block_label (gimple_goto_dest (stmt));
> -             gimple_goto_set_dest (stmt, new_dest);
> +             label = gimple_goto_dest (stmt);
> +             new_label = main_block_label (label);
> +             if (new_label != label)
> +               gimple_goto_set_dest (stmt, new_label);

What's the reason for this changes?  Optimization?

>            }
>          break;
>
> +       case GIMPLE_TRANSACTION:
> +         {
> +           tree label = gimple_transaction_label (stmt);
> +           if (label)
> +             {
> +               tree new_label = main_block_label (label);
> +               if (new_label != label)
> +                 gimple_transaction_set_label (stmt, new_label);
> +             }
> +         }
> +         break;
> +
>        default:
>          break;
>       }
> @@ -2263,6 +2296,13 @@ is_ctrl_altering_stmt (gimple t)
>        if (flags & ECF_NORETURN)
>          return true;
>
> +       /* TM ending statements have backedges out of the transaction.
> +          Return true so we split the basic block containing
> +          them.  */
> +       if ((flags & ECF_TM_OPS)
> +           && is_tm_ending_fndecl (gimple_call_fndecl (t)))
> +         return true;
> +
>        /* BUILT_IN_RETURN call is same as return statement.  */
>        if (gimple_call_builtin_p (t, BUILT_IN_RETURN))
>          return true;
> @@ -2284,6 +2324,10 @@ is_ctrl_altering_stmt (gimple t)
>       /* OpenMP directives alter control flow.  */
>       return true;
>
> +    case GIMPLE_TRANSACTION:
> +      /* A transaction start alters control flow.  */
> +      return true;
> +
>     default:
>       break;
>     }
> @@ -4054,6 +4098,17 @@ verify_gimple_switch (gimple stmt)
>   return false;
>  }
>
> +/* Verify the contents of a GIMPLE_TRANSACTION.  Returns true if there
> +   is a problem, otherwise false.  */
> +
> +static bool
> +verify_gimple_transaction (gimple stmt)
> +{
> +  tree lab = gimple_transaction_label (stmt);
> +  if (lab != NULL && TREE_CODE (lab) != LABEL_DECL)
> +    return true;

ISTR this has substatements, so you should handle this in
verify_gimple_in_seq_2 and make sure to verify those substatements.

> +  return false;
> +}
>
>  /* Verify a gimple debug statement STMT.
>    Returns true if anything is wrong.  */
> @@ -4155,6 +4210,9 @@ verify_gimple_stmt (gimple stmt)
>     case GIMPLE_ASM:
>       return false;
>
> +    case GIMPLE_TRANSACTION:
> +      return verify_gimple_transaction (stmt);
> +

Not here.

>     /* Tuples that do not have tree operands.  */
>     case GIMPLE_NOP:
>     case GIMPLE_PREDICT:
> @@ -4271,10 +4329,19 @@ verify_gimple_in_seq_2 (gimple_seq stmts
>          err |= verify_gimple_in_seq_2 (gimple_eh_filter_failure (stmt));
>          break;
>
> +       case GIMPLE_EH_ELSE:
> +         err |= verify_gimple_in_seq_2 (gimple_eh_else_n_body (stmt));
> +         err |= verify_gimple_in_seq_2 (gimple_eh_else_e_body (stmt));
> +         break;
> +
>        case GIMPLE_CATCH:
>          err |= verify_gimple_in_seq_2 (gimple_catch_handler (stmt));
>          break;
>
> +       case GIMPLE_TRANSACTION:
> +         err |= verify_gimple_in_seq_2 (gimple_transaction_body (stmt));
> +         break;
> +

Ah, you do.  But you'll never call your label verification code.

>        default:
>          {
>            bool err2 = verify_gimple_stmt (stmt);
> @@ -5052,6 +5119,14 @@ gimple_redirect_edge_and_branch (edge e,
>        redirect_eh_dispatch_edge (stmt, e, dest);
>       break;
>
> +    case GIMPLE_TRANSACTION:
> +      /* The ABORT edge has a stored label associated with it, otherwise
> +        the edges are simply redirectable.  */
> +      /* ??? We don't really need this label after the cfg is created.  */
> +      if (e->flags == 0)
> +       gimple_transaction_set_label (stmt, gimple_block_label (dest));

So why set it (and thus keep it live)?

> +      break;
> +
>     default:
>       /* Otherwise it must be a fallthru edge, and we don't need to
>         do anything besides redirecting it.  */
> @@ -6428,8 +6503,10 @@ dump_function_to_file (tree fn, FILE *fi
>   bool ignore_topmost_bind = false, any_var = false;
>   basic_block bb;
>   tree chain;
> +  bool tmclone = TREE_CODE (fn) == FUNCTION_DECL && DECL_IS_TM_CLONE (fn);
>
> -  fprintf (file, "%s (", lang_hooks.decl_printable_name (fn, 2));
> +  fprintf (file, "%s %s(", lang_hooks.decl_printable_name (fn, 2),
> +          tmclone ? "[tm-clone] " : "");
>
>   arg = DECL_ARGUMENTS (fn);
>   while (arg)
> Index: gcc/passes.c
> ===================================================================
> --- gcc/passes.c        (.../trunk)     (revision 180744)
> +++ gcc/passes.c        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -1174,9 +1174,11 @@ init_optimization_passes (void)
>   p = &all_lowering_passes;
>   NEXT_PASS (pass_warn_unused_result);
>   NEXT_PASS (pass_diagnose_omp_blocks);
> +  NEXT_PASS (pass_diagnose_tm_blocks);
>   NEXT_PASS (pass_mudflap_1);
>   NEXT_PASS (pass_lower_omp);
>   NEXT_PASS (pass_lower_cf);
> +  NEXT_PASS (pass_lower_tm);
>   NEXT_PASS (pass_refactor_eh);
>   NEXT_PASS (pass_lower_eh);
>   NEXT_PASS (pass_build_cfg);
> @@ -1241,6 +1243,7 @@ init_optimization_passes (void)
>     }
>   NEXT_PASS (pass_ipa_increase_alignment);
>   NEXT_PASS (pass_ipa_matrix_reorg);
> +  NEXT_PASS (pass_ipa_tm);
>   NEXT_PASS (pass_ipa_lower_emutls);
>   *p = NULL;
>
> @@ -1400,6 +1403,13 @@ init_optimization_passes (void)
>       NEXT_PASS (pass_uncprop);
>       NEXT_PASS (pass_local_pure_const);
>     }
> +  NEXT_PASS (pass_tm_init);
> +    {
> +      struct opt_pass **p = &pass_tm_init.pass.sub;
> +      NEXT_PASS (pass_tm_mark);
> +      NEXT_PASS (pass_tm_memopt);
> +      NEXT_PASS (pass_tm_edges);
> +    }
>   NEXT_PASS (pass_lower_complex_O0);
>   NEXT_PASS (pass_cleanup_eh);
>   NEXT_PASS (pass_lower_resx);
> Index: gcc/reg-notes.def
> ===================================================================
> --- gcc/reg-notes.def   (.../trunk)     (revision 180744)
> +++ gcc/reg-notes.def   (.../branches/transactional-memory)     (revision
> 180773)
> @@ -203,6 +203,11 @@ REG_NOTE (CROSSING_JUMP)
>    functions that can return twice.  */
>  REG_NOTE (SETJMP)
>
> +/* This kind of note is generated at each transactional memory
> +   builtin, to indicate we need to generate transaction restart
> +   edges for this insn.  */
> +REG_NOTE (TM)
> +
>  /* Indicates the cumulative offset of the stack pointer accounting
>    for pushed arguments.  This will only be generated when
>    ACCUMULATE_OUTGOING_ARGS is false.  */
> Index: gcc/cfgrtl.c
> ===================================================================
> --- gcc/cfgrtl.c        (.../trunk)     (revision 180744)
> +++ gcc/cfgrtl.c        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -2246,6 +2246,8 @@ purge_dead_edges (basic_block bb)
>            ;
>          else if ((e->flags & EDGE_EH) && can_throw_internal (insn))
>            ;
> +         else if (flag_tm && find_reg_note (insn, REG_TM, NULL))
> +           ;
>          else
>            remove = true;
>        }
> Index: gcc/params.def
> ===================================================================
> --- gcc/params.def      (.../trunk)     (revision 180744)
> +++ gcc/params.def      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -872,6 +872,13 @@ DEFPARAM (PARAM_IPA_SRA_PTR_GROWTH_FACTO
>          "a pointer to an aggregate with",
>          2, 0, 0)
>
> +DEFPARAM (PARAM_TM_MAX_AGGREGATE_SIZE,
> +         "tm-max-aggregate-size",
> +         "Size in bytes after which thread-local aggregates should be "
> +         "instrumented with the logging functions instead of save/restore "
> +         "pairs",
> +         9, 0, 0)
> +
>  DEFPARAM (PARAM_IPA_CP_VALUE_LIST_SIZE,
>          "ipa-cp-value-list-size",
>          "Maximum size of a list of values associated with each parameter
> for "
>



More information about the Gcc-patches mailing list