This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][gimple-interchange] Final cleanup stuff


On Tue, Dec 5, 2017 at 1:02 PM, Richard Biener <rguenther@suse.de> wrote:
>
> This is my final sweep through the code doing cleanup on-the-fly.
Hi,
Thanks very much for all your help!
>
> I think the code is ready to go now (after you committed your changes).
>
> What I'd eventually like to see is merge the two loop_cand objects
> into a single interchange_cand object, having two really confuses
> me when reading and trying to understand code.  But let's defer this
> for next stage1.
>
> A change that's still required is adjusting the graphite testcases
> to use -floop-nest-optimize (you promised to do that) and to enable
> the interchange pass at -O3 by default.
Yeah, there will be two separated patches, one adjusting graphite
tests and the other enabling at O3 with miscellaneous test change.

>
> With that, can you prepare a patch that merges the changes on the
> branch to trunk and provide updated performance / statistics
> for SPEC (maybe also SPEC compile-time figures with/without the
> patch?  it's enough to look at the Elapsed Compile time numbers in
> the log file IMHO).
Will collect data for the final version.  Thanks again.

Thanks,
bin
>
> Thanks,
> Richard.
>
> 2017-12-05  Richard Biener  <rguenther@suse.de>
>
>         * gimple-loop-interchange.cc (AVG_LOOP_NITER): Remove.
>         (loop_cand::supported_operations): Simplify.
>         (loop_cand::analyze_iloop_reduction_var): Use m_exit.
>         (loop_cand::analyze_oloop_reduction_var): Likewise.
>         (loop_cand::analyze_lcssa_phis): Likewise.
>         (find_deps_in_bb_for_stmt): Use gimple_seq_add_stmt_without_update.
>         (loop_cand::undo_simple_reduction): Likewise, properly release
>         virtual defs.
>         (tree_loop_interchange::interchange_loops): Likewise.  Move code
>         to innner loop here.
>         (tree_loop_interchange::map_inductions_to_loop): Remove code moving
>         code to inner loop.
>         (insert_pos_at_inner_loop): Inline into single caller...
>         (tree_loop_interchange::move_code_to_inner): ...here.  Properly
>         release virtual defs.
>         (proper_loop_form_for_interchange): Properly analyze/instantiate SCEV.
>         (prepare_perfect_loop_nest): Do not explicitely allocate vectors.
>
> Index: gcc/gimple-loop-interchange.cc
> ===================================================================
> --- gcc/gimple-loop-interchange.cc      (revision 255414)
> +++ gcc/gimple-loop-interchange.cc      (working copy)
> @@ -81,8 +81,6 @@ along with GCC; see the file COPYING3.
>  #define MAX_NUM_STMT    (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS))
>  /* Maximum number of data references in loop nest.  */
>  #define MAX_DATAREFS    (PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
> -/* Default average number of loop iterations.  */
> -#define AVG_LOOP_NITER  (PARAM_VALUE (PARAM_AVG_LOOP_NITER))
>
>  /* Comparison ratio of access stride between inner/outer loops to be
>     interchanged.  This is the minimum stride ratio for loop interchange
> @@ -105,7 +103,7 @@ typedef struct induction
>    /* IV's base and step part of SCEV.  */
>    tree base;
>    tree step;
> -}* induction_p;
> +} *induction_p;
>
>  /* Enum type for loop reduction variable.  */
>  enum reduction_type
> @@ -136,7 +134,7 @@ typedef struct reduction
>       reference.  */
>    tree fini_ref;
>    enum reduction_type type;
> -}* reduction_p;
> +} *reduction_p;
>
>
>  /* Dump reduction RE.  */
> @@ -302,24 +300,17 @@ loop_cand::supported_operations (basic_b
>        if (is_gimple_debug (stmt))
>         continue;
>
> -      if (gimple_has_volatile_ops (stmt)
> -         || gimple_has_side_effects (stmt))
> +      if (gimple_has_side_effects (stmt))
>         return false;
>
>        bb_num_stmts++;
> -      if (is_gimple_call (stmt))
> +      if (gcall *call = dyn_cast <gcall *> (stmt))
>         {
> -         int cflags = gimple_call_flags (stmt);
> -         /* Only support const/pure calls.  */
> -         if (!(cflags & (ECF_CONST | ECF_PURE)))
> -           return false;
> -
>           /* In basic block of outer loop, the call should be cheap since
>              it will be moved to inner loop.  */
>           if (iloop != NULL
> -             && !gimple_inexpensive_call_p (as_a <gcall *> (stmt)))
> +             && !gimple_inexpensive_call_p (call))
>             return false;
> -
>           continue;
>         }
>
> @@ -334,6 +325,7 @@ loop_cand::supported_operations (basic_b
>        tree lhs;
>        /* Support loop invariant memory reference if it's only used once by
>          inner loop.  */
> +      /* ???  How's this checking for invariantness?  */
>        if (gimple_assign_single_p (stmt)
>           && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
>           && TREE_CODE (lhs) == SSA_NAME
> @@ -347,7 +339,7 @@ loop_cand::supported_operations (basic_b
>    /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
>       loop's header, or PHI nodes in dest bb of inner loop's exit edge.  */
>    if (!iloop || bb == m_loop->header
> -      || bb == single_dom_exit (iloop->m_loop)->dest)
> +      || bb == iloop->m_exit->dest)
>      return true;
>
>    /* Don't allow any other PHI nodes.  */
> @@ -484,7 +476,6 @@ loop_cand::analyze_iloop_reduction_var (
>    gphi *lcssa_phi = NULL, *use_phi;
>    tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
>    tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
> -  edge e = single_exit (m_loop);
>    reduction_p re;
>    gimple *stmt, *next_def, *single_use = NULL;
>    use_operand_p use_p;
> @@ -571,8 +562,8 @@ loop_cand::analyze_iloop_reduction_var (
>
>        if (use_phi != NULL
>           && lcssa_phi == NULL
> -         && gimple_bb (stmt) == e->dest
> -         && PHI_ARG_DEF_FROM_EDGE (use_phi, e) == next)
> +         && gimple_bb (stmt) == m_exit->dest
> +         && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
>         lcssa_phi = use_phi;
>        else
>         return false;
> @@ -621,7 +612,6 @@ loop_cand::analyze_oloop_reduction_var (
>    gphi *lcssa_phi = NULL, *use_phi;
>    tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
>    tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
> -  edge e = single_exit (m_loop);
>    reduction_p re;
>    gimple *stmt, *next_def;
>    use_operand_p use_p;
> @@ -671,8 +661,8 @@ loop_cand::analyze_oloop_reduction_var (
>
>        if (lcssa_phi == NULL
>           && use_phi != NULL
> -         && gimple_bb (stmt) == e->dest
> -         && PHI_ARG_DEF_FROM_EDGE (use_phi, e) == next)
> +         && gimple_bb (stmt) == m_exit->dest
> +         && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
>         lcssa_phi = use_phi;
>        else
>         return false;
> @@ -781,10 +771,8 @@ loop_cand::analyze_carried_vars (loop_ca
>  bool
>  loop_cand::analyze_lcssa_phis (void)
>  {
> -  edge e = single_exit (m_loop);
>    gphi_iterator gsi;
> -
> -  for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
> +  for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
>      {
>        gphi *phi = gsi.phi ();
>
> @@ -810,7 +798,7 @@ loop_cand::analyze_lcssa_phis (void)
>  static void
>  find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
>  {
> -  auto_vec<gimple *> worklist;
> +  auto_vec<gimple *, 4> worklist;
>    use_operand_p use_p;
>    ssa_op_iter iter;
>    gimple *stmt, *def_stmt;
> @@ -845,7 +833,7 @@ find_deps_in_bb_for_stmt (gimple_seq *st
>        if (gimple_plf (stmt, GF_PLF_1))
>         {
>           gsi_remove (&gsi, false);
> -         gimple_seq_add_stmt (stmts, stmt);
> +         gimple_seq_add_stmt_without_update (stmts, stmt);
>         }
>        else
>         gsi_next_nondebug (&gsi);
> @@ -898,7 +886,7 @@ loop_cand::undo_simple_reduction (reduct
>        gimple_set_vuse (re->producer, NULL_TREE);
>        from = gsi_for_stmt (re->producer);
>        gsi_remove (&from, false);
> -      gimple_seq_add_stmt (&stmts, re->producer);
> +      gimple_seq_add_stmt_without_update (&stmts, re->producer);
>        new_var = re->init;
>      }
>    else
> @@ -908,7 +896,7 @@ loop_cand::undo_simple_reduction (reduct
>        /* Because we generate new stmt loading from the MEM_REF to TMP.  */
>        tree tmp = copy_ssa_name (re->var);
>        stmt = gimple_build_assign (tmp, re->init_ref);
> -      gimple_seq_add_stmt (&stmts, stmt);
> +      gimple_seq_add_stmt_without_update (&stmts, stmt);
>
>        /* Init new_var to MEM_REF or CONST depending on if it is the first
>          iteration.  */
> @@ -916,7 +904,7 @@ loop_cand::undo_simple_reduction (reduct
>        tree cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init);
>        new_var = copy_ssa_name (re->var);
>        stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
> -      gimple_seq_add_stmt (&stmts, stmt);
> +      gimple_seq_add_stmt_without_update (&stmts, stmt);
>      }
>    gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
>
> @@ -932,10 +920,11 @@ loop_cand::undo_simple_reduction (reduct
>      }
>
>    /* Move consumer stmt into inner loop, just after reduction next's def.  */
> +  unlink_stmt_vdef (re->consumer);
> +  release_ssa_name (gimple_vdef (re->consumer));
>    gimple_set_vdef (re->consumer, NULL_TREE);
>    gimple_set_vuse (re->consumer, NULL_TREE);
>    gimple_assign_set_rhs1 (re->consumer, re->next);
> -  update_stmt (re->consumer);
>    from = gsi_for_stmt (re->consumer);
>    to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
>    gsi_move_after (&from, &to);
> @@ -1129,6 +1118,10 @@ tree_loop_interchange::interchange_loops
>    o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
>                                        NULL_TREE, false, GSI_CONTINUE_LINKING);
>
> +  /* Move src's code to tgt loop.  This is necessary when src is the outer
> +     loop and tgt is the inner loop.  */
> +  move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
> +
>    /* Map outer loop's IV to inner loop, and vice versa.  */
>    map_inductions_to_loop (oloop, iloop);
>    map_inductions_to_loop (iloop, oloop);
> @@ -1137,10 +1130,10 @@ tree_loop_interchange::interchange_loops
>       loop is actually from inner/outer loop.  Also we record the new IV
>       created for the outer loop so that it can be skipped in later loop
>       interchange.  */
> -  create_canonical_iv (oloop.m_loop, single_exit (oloop.m_loop),
> +  create_canonical_iv (oloop.m_loop, oloop.m_exit,
>                        i_niters, &m_niters_iv_var, &var_after);
>    bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
> -  create_canonical_iv (iloop.m_loop, single_exit (iloop.m_loop),
> +  create_canonical_iv (iloop.m_loop, iloop.m_exit,
>                        o_niters, NULL, &var_after);
>    bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
>
> @@ -1151,6 +1144,11 @@ tree_loop_interchange::interchange_loops
>    oloop.m_loop->any_upper_bound = false;
>    oloop.m_loop->any_likely_upper_bound = false;
>    free_numbers_of_iterations_estimates (oloop.m_loop);
> +
> +  /* ???  The association between the loop data structure and the
> +     CFG changed, so what was loop N at the source level is now
> +     loop M.  We should think of retaining the association or breaking
> +     it fully by creating a new loop instead of re-using the "wrong" one.  */
>  }
>
>  /* Map induction variables of SRC loop to TGT loop.  The function firstly
> @@ -1180,14 +1178,8 @@ void
>  tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
>  {
>    induction_p iv;
> -  edge e = single_exit (tgt.m_loop);
> +  edge e = tgt.m_exit;
>    gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
> -  bool move_code_p = flow_loop_nested_p (src.m_loop, tgt.m_loop);
> -
> -  /* Move src's code to tgt loop.  This is necessary when src is the outer
> -     loop and tgt is the inner loop.  */
> -  if (move_code_p)
> -    move_code_to_inner_loop (src.m_loop, tgt.m_loop, src.m_bbs);
>
>    /* Map source loop's IV to target loop.  */
>    for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
> @@ -1234,26 +1226,6 @@ tree_loop_interchange::map_inductions_to
>      }
>  }
>
> -/* Compute the insert position at inner loop when moving code from outer
> -   loop to inner one.  */
> -
> -static inline void
> -insert_pos_at_inner_loop (struct loop *outer, struct loop *inner,
> -                         basic_block bb, gimple_stmt_iterator *pos)
> -{
> -  /* Move code from header/latch to header/latch.  */
> -  if (bb == outer->header)
> -    *pos = gsi_after_labels (inner->header);
> -  else if (bb == outer->latch)
> -    *pos = gsi_after_labels (inner->latch);
> -  else
> -    {
> -      /* Otherwise, simply move to exit->src.  */
> -      edge e = single_exit (inner);
> -      *pos = gsi_last_bb (e->src);
> -    }
> -}
> -
>  /* Move stmts of outer loop to inner loop.  */
>
>  void
> @@ -1272,7 +1244,15 @@ tree_loop_interchange::move_code_to_inne
>        if (flow_bb_inside_loop_p (inner, bb))
>         continue;
>
> -      insert_pos_at_inner_loop (outer, inner, bb, &to);
> +      /* Move code from header/latch to header/latch.  */
> +      if (bb == outer->header)
> +       to = gsi_after_labels (inner->header);
> +      else if (bb == outer->latch)
> +       to = gsi_after_labels (inner->latch);
> +      else
> +       /* Otherwise, simply move to exit->src.  */
> +       to = gsi_last_bb (single_exit (inner)->src);
> +
>        for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
>         {
>           gimple *stmt = gsi_stmt (gsi);
> @@ -1287,7 +1267,11 @@ tree_loop_interchange::move_code_to_inne
>           if (gimple_vuse (stmt))
>             gimple_set_vuse (stmt, NULL_TREE);
>           if (gimple_vdef (stmt))
> -           gimple_set_vdef (stmt, NULL_TREE);
> +           {
> +             unlink_stmt_vdef (stmt);
> +             release_ssa_name (gimple_vdef (stmt));
> +             gimple_set_vdef (stmt, NULL_TREE);
> +           }
>
>           reset_debug_uses (stmt);
>           gsi_move_before (&gsi, &to);
> @@ -1756,18 +1740,20 @@ proper_loop_form_for_interchange (struct
>    free (bbs);
>
>    tree niters = number_of_latch_executions (loop);
> +  niters = analyze_scalar_evolution (loop_outer (loop), niters);
>    if (!niters || chrec_contains_undetermined (niters))
>      return false;
>
>    /* Record the innermost outer loop that doesn't form rectangle loop nest.  */
> -  for (loop = loop_outer (loop);
> -       loop && flow_loop_nested_p (*min_outer, loop);
> -       loop = loop_outer (loop))
> -    {
> -      niters = instantiate_scev (loop_preheader_edge (loop), loop, niters);
> -      if (!evolution_function_is_invariant_p (niters, loop->num))
> +  for (loop_p loop2 = loop_outer (loop);
> +       loop2 && flow_loop_nested_p (*min_outer, loop2);
> +       loop2 = loop_outer (loop2))
> +    {
> +      niters = instantiate_scev (loop_preheader_edge (loop2),
> +                                loop_outer (loop), niters);
> +      if (!evolution_function_is_invariant_p (niters, loop2->num))
>         {
> -         *min_outer = loop;
> +         *min_outer = loop2;
>           break;
>         }
>      }
> @@ -1968,7 +1954,6 @@ prepare_perfect_loop_nest (struct loop *
>
>    /* Prepare the data reference vector for the loop nest, pruning outer
>       loops we cannot handle.  */
> -  datarefs->create (20);
>    start_loop = prepare_data_references (start_loop, datarefs);
>    if (!start_loop
>        /* Check if there is no data reference.  */
> @@ -1990,9 +1975,7 @@ prepare_perfect_loop_nest (struct loop *
>    /* Check if data dependences can be computed for loop nest starting from
>       start_loop.  */
>    loop = start_loop;
> -  loop_nest->create (3);
>    do {
> -    ddrs->create (20);
>      loop_nest->truncate (0);
>
>      if (loop != start_loop)
> @@ -2015,8 +1998,6 @@ prepare_perfect_loop_nest (struct loop *
>         return true;
>        }
>
> -    /* ???  We should be able to adjust DDRs we already computed by
> -       truncating distance vectors.  */
>      free_dependence_relations (*ddrs);
>      *ddrs = vNULL;
>      /* Try to compute data dependences with the outermost loop stripped.  */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]