This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Fix always_inline


On Wed, May 28, 2008 at 9:15 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
>> On Mon, May 19, 2008 at 9:36 PM, Jan Hubicka <jh@suse.cz> wrote:
>> > > ISTR that Honza has it working to some extent already.
>> >
>> >  I am attaching WIP patch (it went quite smoothly).  It breaks complex,
>> >  mudflap and other stuff, but it is good enough to bootstrap so we can
>> >  get some idea how things works.  Rearranging the mudflap/complex passes
>> >  should be easy enough later.
>> >
>> >  Compiling combine.c with SSA path slows down compiler from 1.517s to
>> >  1.636 and increase size of produced assembly somewhat (695k->697k).  7%
>> >  slowdown for combine.c (5% for Gerald's testcase) is something we need
>> >  to consider, but it is not _that_ disasterous.  I didn't experimented
>> >  with limited DCE or CCP yet.
>> >  Probably if every statement writting to user variable is marked as
>> >  having side effect, our DCE should be safe.
>>
>> The figures are without enabling TER, I would expect both size and
>> time savings from enabling it.
>
> Hi,
> here is variant of patch that enables limited amount of TER (basically
> we can't mix statements from various locations or we confuse GDB) and
> gets clean run on GDB testsuite.
>
> I am still testing it, but compile time and code size now actually
> improves for combine.c and Gerald's testcase.  We eat more memory as
> expected, but the extra ggc runs seems to pay back in performance, so we
> probably lose noticeably only in case of testcases having single big
> function where SSA form is expensive....
>
> tree-ssa-coalesce change (to make all names comming from single user var
> coalesced) is a hack.  It can be made linear with a hashtable, but
> perhaps the info is somehow readilly available just I don't know about
> it.
>
> Honza
>
> Index: cgraph.c
> ===================================================================
> --- cgraph.c    (revision 136074)
> +++ cgraph.c    (working copy)
> @@ -1070,7 +1070,7 @@
>        if (!lowered)
>           tree_lowering_passes (fndecl);
>        bitmap_obstack_initialize (NULL);
> -       if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (fndecl)) && optimize)
> +       if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (fndecl)))
>          execute_pass_list (pass_early_local_passes.pass.sub);
>        bitmap_obstack_release (NULL);
>        tree_rest_of_compilation (fndecl);
> Index: tree-ssa-coalesce.c
> ===================================================================
> --- tree-ssa-coalesce.c (revision 136074)
> +++ tree-ssa-coalesce.c (working copy)
> @@ -1309,13 +1309,39 @@
>   coalesce_list_p cl;
>   bitmap used_in_copies = BITMAP_ALLOC (NULL);
>   var_map map;
> +  int i, j;
>
>   cl = create_coalesce_list ();
>   map = create_outofssa_var_map (cl, used_in_copies);
>
> +  /* FIXME: We need to coalesce all names originating same SSA_NAME_VAR.
> +     I am not sure what is proper implementation here. */

At least the loop nest can be somewhat optimized by moving the ssa_name
reference of i out of the innermost loop, as well as the test for
DECL_ARTIFICIAL of a.
The asserts look unneccessary.  The extra dumps should also be removed again
IMHO.  For the constant 10000 there is a #define, MUST_COALESCE_COST.

Otherwise this looks ok, but please re-post the patch with the above changes
under a new, more attention-raising topic and wait a bit for feedback ;)

Thanks,
Richard.

> +  if (!optimize)
> +    for (i = 1; i < num_ssa_names; i++)
> +      for (j = 1; j < i; j++)
> +         {
> +           tree a = ssa_name (i);
> +           tree b = ssa_name (j);
> +           if (a && b && SSA_NAME_VAR (a) == SSA_NAME_VAR (b)
> +               && !DECL_ARTIFICIAL (SSA_NAME_VAR (a)))
> +             {
> +               add_coalesce (cl, i, j, 10000);
> +               gcc_assert (SSA_NAME_VERSION (a) == i);
> +               gcc_assert (SSA_NAME_VERSION (b) == j);
> +               bitmap_set_bit (used_in_copies, SSA_NAME_VERSION (a));
> +               bitmap_set_bit (used_in_copies, SSA_NAME_VERSION (b));
> +             }
> +         }
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    dump_var_map (dump_file, map);
> +
>   /* Don't calculate live ranges for variables not in the coalesce list.  */
>   partition_view_bitmap (map, used_in_copies, true);
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    dump_var_map (dump_file, map);
>   BITMAP_FREE (used_in_copies);
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    dump_var_map (dump_file, map);
>
>   if (num_var_partitions (map) < 1)
>     {
> Index: tree-ssa-live.c
> ===================================================================
> --- tree-ssa-live.c     (revision 136074)
> +++ tree-ssa-live.c     (working copy)
> @@ -660,7 +660,8 @@
>
>          if (TREE_CODE (var) == VAR_DECL
>              && is_global_var (var)
> -             && bitmap_bit_p (global_unused_vars, DECL_UID (var)))
> +             && bitmap_bit_p (global_unused_vars, DECL_UID (var))
> +             && (optimize || DECL_ARTIFICIAL (var)))
>            *cell = TREE_CHAIN (*cell);
>          else
>            cell = &TREE_CHAIN (*cell);
> @@ -680,9 +681,11 @@
>        && TREE_CODE (t) != RESULT_DECL
>        && !(ann = var_ann (t))->used
>        && !ann->symbol_mem_tag
> -       && !TREE_ADDRESSABLE (t))
> +       && !TREE_ADDRESSABLE (t)
> +       && (optimize || DECL_ARTIFICIAL (t)))
>       remove_referenced_var (t);
> -  remove_unused_scope_block_p (DECL_INITIAL (current_function_decl));
> +  if (optimize)
> +    remove_unused_scope_block_p (DECL_INITIAL (current_function_decl));
>  }
>
>
> Index: common.opt
> ===================================================================
> --- common.opt  (revision 136074)
> +++ common.opt  (working copy)
> @@ -1149,7 +1149,7 @@
>  Perform scalar replacement of aggregates
>
>  ftree-ter
> -Common Report Var(flag_tree_ter) Optimization
> +Common Report Var(flag_tree_ter) Init(1) Optimization
>  Replace temporary expressions in the SSA->normal pass
>
>  ftree-lrs
> Index: tree-ssa-ter.c
> ===================================================================
> --- tree-ssa-ter.c      (revision 136074)
> +++ tree-ssa-ter.c      (working copy)
> @@ -29,6 +29,7 @@
>  #include "tree-flow.h"
>  #include "tree-dump.h"
>  #include "tree-ssa-live.h"
> +#include "flags.h"
>
>
>  /* Temporary Expression Replacement (TER)
> @@ -363,6 +364,8 @@
>   tree call_expr;
>   use_operand_p use_p;
>   tree def, use_stmt;
> +  location_t locus1, locus2;
> +  tree block1, block2;
>
>   /* Only consider modify stmts.  */
>   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
> @@ -385,6 +388,36 @@
>   if (bb_for_stmt (use_stmt) != bb_for_stmt (stmt))
>     return false;
>
> +  if (GIMPLE_STMT_P (stmt))
> +    {
> +      locus1 = GIMPLE_STMT_LOCUS (stmt);
> +      block1 = GIMPLE_STMT_BLOCK (stmt);
> +    }
> +  else
> +    {
> +      locus1 = EXPR_LOCUS (stmt);
> +      block1 = TREE_BLOCK (stmt);
> +    }
> +  if (GIMPLE_STMT_P (use_stmt))
> +    {
> +      locus2 = GIMPLE_STMT_LOCUS (use_stmt);
> +      block2 = GIMPLE_STMT_BLOCK (use_stmt);
> +    }
> +  if (TREE_CODE (use_stmt) == PHI_NODE)
> +    {
> +      locus2 = NULL;
> +      block2 = NULL_TREE;
> +    }
> +  else
> +    {
> +      locus2 = EXPR_LOCUS (use_stmt);
> +      block2 = TREE_BLOCK (use_stmt);
> +    }
> +
> +  if (!optimize
> +      && ((locus1 && locus1 != locus2) || (block1 && block1 != block2)))
> +    return false;
> +
>   /* Used in this block, but at the TOP of the block, not the end.  */
>   if (TREE_CODE (use_stmt) == PHI_NODE)
>     return false;
> @@ -411,7 +444,6 @@
>   /* Leave any stmt with volatile operands alone as well.  */
>   if (stmt_ann (stmt)->has_volatile_ops)
>     return false;
> -
>
>   return true;
>  }
> Index: tree-optimize.c
> ===================================================================
> --- tree-optimize.c     (revision 136074)
> +++ tree-optimize.c     (working copy)
> @@ -337,20 +337,12 @@
>   return 0;
>  }
>
> -/* Gate: initialize or not the SSA datastructures.  */
> -
> -static bool
> -gate_init_datastructures (void)
> -{
> -  return (optimize >= 1);
> -}
> -
>  struct gimple_opt_pass pass_init_datastructures =
>  {
>  {
>   GIMPLE_PASS,
>   NULL,                                        /* name */
> -  gate_init_datastructures,            /* gate */
> +  NULL,                                        /* gate */
>   execute_init_datastructures,         /* execute */
>   NULL,                                        /* sub */
>   NULL,                                        /* next */
> Index: tree-outof-ssa.c
> ===================================================================
> --- tree-outof-ssa.c    (revision 136074)
> +++ tree-outof-ssa.c    (working copy)
> @@ -1477,7 +1477,7 @@
>   NULL,                                        /* next */
>   0,                                   /* static_pass_number */
>   TV_TREE_SSA_TO_NORMAL,               /* tv_id */
> -  PROP_cfg | PROP_ssa | PROP_alias,    /* properties_required */
> +  PROP_cfg | PROP_ssa,                 /* properties_required */
>   0,                                   /* properties_provided */
>   /* ??? If TER is enabled, we also kill gimple.  */
>   PROP_ssa,                            /* properties_destroyed */
> Index: passes.c
> ===================================================================
> --- passes.c    (revision 136074)
> +++ passes.c    (working copy)
> @@ -542,12 +542,13 @@
>       NEXT_PASS (pass_cleanup_cfg);
>       NEXT_PASS (pass_init_datastructures);
>       NEXT_PASS (pass_expand_omp);
> +
> +      NEXT_PASS (pass_referenced_vars);
> +      NEXT_PASS (pass_reset_cc_flags);
> +      NEXT_PASS (pass_build_ssa);
>       NEXT_PASS (pass_all_early_optimizations);
>        {
>          struct opt_pass **p = &pass_all_early_optimizations.pass.sub;
> -         NEXT_PASS (pass_referenced_vars);
> -         NEXT_PASS (pass_reset_cc_flags);
> -         NEXT_PASS (pass_build_ssa);
>          NEXT_PASS (pass_early_warn_uninitialized);
>          NEXT_PASS (pass_rebuild_cgraph_edges);
>          NEXT_PASS (pass_early_inline);
> @@ -565,8 +566,8 @@
>          NEXT_PASS (pass_simple_dse);
>          NEXT_PASS (pass_tail_recursion);
>           NEXT_PASS (pass_profile);
> -         NEXT_PASS (pass_release_ssa_names);
>        }
> +      NEXT_PASS (pass_release_ssa_names);
>       NEXT_PASS (pass_rebuild_cgraph_edges);
>     }
>   NEXT_PASS (pass_ipa_increase_alignment);
> @@ -703,11 +704,12 @@
>       NEXT_PASS (pass_tail_calls);
>       NEXT_PASS (pass_rename_ssa_copies);
>       NEXT_PASS (pass_uncprop);
> -      NEXT_PASS (pass_del_ssa);
> -      NEXT_PASS (pass_nrv);
> -      NEXT_PASS (pass_mark_used_blocks);
> -      NEXT_PASS (pass_cleanup_cfg_post_optimizing);
>     }
> +  NEXT_PASS (pass_del_ssa);
> +  NEXT_PASS (pass_nrv);
> +  NEXT_PASS (pass_mark_used_blocks);
> +  NEXT_PASS (pass_cleanup_cfg_post_optimizing);
> +
>   NEXT_PASS (pass_warn_function_noreturn);
>   NEXT_PASS (pass_free_datastructures);
>   NEXT_PASS (pass_mudflap_2);
> @@ -931,7 +933,10 @@
>         if the parent pass had not scheduled an SSA update, we may
>         still need to do one.  */
>       if (!(flags & TODO_update_ssa_any) && need_ssa_update_p ())
> -       flags |= TODO_update_ssa;
> +        {
> +         flags |= TODO_update_ssa;
> +         gcc_assert (cfun->curr_properties & PROP_ssa);
> +       }
>     }
>
>   if (flags & TODO_update_ssa_any)
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]