This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [Patch] Improving jump-thread pass for PR 54742


On Tue, Nov 11, 2014 at 01:14:04AM +0000, Sebastian Pop wrote:
> Hi Jeff,
> 
> I have adapted the code generation part from James' patch to current trunk, and
> the resulting patch gets the 30% speedup on coremark and passes bootstrap of GCC.

For what it is worth, I've bootstrapped and tested this patch on
aarch64-none-linux-gnu and arm-none-linux-gnueabi with no issues, and
both targets get the expected speedup in the interesting benchmark.
I've also thrown some of the larger popular benchmark suites at it, and
they've compiled and run without any compilation issues or miscompares.

I'm happy to help out with the testing and bug-triaging effort once this
patch goes in.

Some very shallow comments: you should document the new parameters
in doc/invoke.texi and you ought to run contrib/check_GNU_style.sh
on the patch and clean up the coding style issues it highlights.

Thanks,
James Greenhalgh

> diff --git a/gcc/params.def b/gcc/params.def
> index d2d2add..749f962 100644
> --- a/gcc/params.def
> +++ b/gcc/params.def
> @@ -123,6 +123,25 @@ DEFPARAM (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY,
>  	  "Maximum probability of the entry BB of split region (in percent relative to entry BB of the function) to make partial inlining happen",
>  	  70, 0, 0)
>  
> +/* Maximum number of instructions to copy when duplicating blocks
> +   on a jump thread path.  */
> +DEFPARAM (PARAM_MAX_THREAD_PATH_INSNS,
> +	  "max-thread-path-insns",
> +	  "Maximum number of instructions to copy when duplicating blocks on a jump thread path",
> +	  100, 1, 999999)
> +
> +/* Maximum length of a jump thread path.  */
> +DEFPARAM (PARAM_MAX_THREAD_LENGTH,
> +	  "max-thread-length",
> +	  "Maximum number of basic blocks on a jump thread path",
> +	  10, 1, 999999)
> +
> +/* Maximum number of jump thread paths to duplicate.  */
> +DEFPARAM (PARAM_MAX_THREAD_PATHS,
> +	  "max-thread-paths",
> +	  "Maximum number of new jump thread paths to create",
> +	  50, 1, 999999)
> +
>  /* Limit the number of expansions created by the variable expansion
>     optimization to avoid register pressure.  */
>  DEFPARAM (PARAM_MAX_VARIABLE_EXPANSIONS,
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
> new file mode 100644
> index 0000000..f3ef725
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
> @@ -0,0 +1,32 @@
> +int sum0, sum1, sum2, sum3;
> +int foo(char * s, char** ret)
> +{
> +  int state=0;
> +  char c;
> +
> +  for (; *s && state != 4; s++)
> +    {
> +      c = *s;
> +      if (c == '*')
> +	{
> +	  s++;
> +	  break;
> +	}
> +      switch (state) {
> +	case 0:
> +	  if (c == '+') state = 1;
> +	  else if (c != '-') sum0+=c;
> +	  break;
> +	case 1:
> +	  if (c == '+') state = 2;
> +	  else if (c == '-') state = 0;
> +	  else sum1+=c;
> +	  break;
> +	default:
> +	  break;
> +      }
> +
> +    }
> +  *ret = s;
> +  return state;
> +}
> diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
> index ee10bc6..565cfe3 100644
> --- a/gcc/tree-cfg.c
> +++ b/gcc/tree-cfg.c
> @@ -5949,10 +5949,12 @@ gimple_duplicate_sese_region (edge entry, edge exit,
>  {
>    unsigned i;
>    bool free_region_copy = false, copying_header = false;
> +  bool save_loop_details = false;
>    struct loop *loop = entry->dest->loop_father;
>    edge exit_copy;
>    vec<basic_block> doms;
>    edge redirected;
> +  int memo_loop_header_no = 0, memo_loop_latch_no = 0;
>    int total_freq = 0, entry_freq = 0;
>    gcov_type total_count = 0, entry_count = 0;
>  
> @@ -5970,9 +5972,15 @@ gimple_duplicate_sese_region (edge entry, edge exit,
>        if (region[i]->loop_father != loop)
>  	return false;
>  
> -      if (region[i] != entry->dest
> -	  && region[i] == loop->header)
> -	return false;
> +      /* If we are copying an abnormally shaped region, keep track of
> +	 which block will become our loop header.  */
> +      if ((region[i] != entry->dest && region[i] == loop->header)
> +	  || (region[i] != entry->src && region[i] == loop->latch))
> +	{
> +	  save_loop_details = true;
> +	  memo_loop_latch_no = i;
> +	  memo_loop_header_no = i;
> +	}
>      }
>  
>    /* In case the function is used for loop header copying (which is the primary
> @@ -6055,6 +6063,13 @@ gimple_duplicate_sese_region (edge entry, edge exit,
>        loop->latch = exit->src;
>      }
>  
> +  /* Restore loop details if we were asked to save them.  */
> +  if (save_loop_details)
> +    {
> +      loop->header = region_copy[memo_loop_header_no];
> +      loop->latch = region_copy[memo_loop_latch_no];
> +    }
> +
>    /* Redirect the entry and add the phi node arguments.  */
>    redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
>    gcc_assert (redirected != NULL);
> diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
> index d5b9696..de9b3fe 100644
> --- a/gcc/tree-ssa-threadedge.c
> +++ b/gcc/tree-ssa-threadedge.c
> @@ -56,6 +56,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "params.h"
>  #include "tree-ssa-threadedge.h"
>  #include "builtins.h"
> +#include "cfg.h"
> +#include "cfganal.h"
>  
>  /* To avoid code explosion due to jump threading, we limit the
>     number of statements we are going to copy.  This variable
> @@ -660,6 +662,7 @@ simplify_control_stmt_condition (edge e,
>       rather than use a relational operator.  These are simpler to handle.  */
>    if (TREE_CODE (cond) == SSA_NAME)
>      {
> +      tree original_lhs = cond;
>        cached_lhs = cond;
>  
>        /* Get the variable's current value from the equivalence chains.
> @@ -688,6 +691,12 @@ simplify_control_stmt_condition (edge e,
>  	 pass specific callback to try and simplify it further.  */
>        if (cached_lhs && ! is_gimple_min_invariant (cached_lhs))
>          cached_lhs = (*simplify) (stmt, stmt);
> +
> +      /* We couldn't find an invariant.  But, callers of this
> +	 function may be able to do something useful with the
> +	 unmodified destination.  */
> +      if (!cached_lhs)
> +	cached_lhs = original_lhs;
>      }
>    else
>      cached_lhs = NULL;
> @@ -947,6 +956,172 @@ thread_around_empty_blocks (edge taken_edge,
>    return false;
>  }
>  
> +/* Return true if there is at least one path from START_BB to END_BB.
> +   VISITED_BBS is used to make sure we don't fall into an infinite loop.  */
> +
> +static bool
> +fsm_find_thread_path (basic_block start_bb, basic_block end_bb,
> +		      vec<basic_block, va_gc> *&path,
> +		      hash_set<basic_block> *visited_bbs, int n_insns)
> +{
> +  if (start_bb == end_bb)
> +    {
> +      vec_safe_push (path, start_bb);
> +      return true;
> +    }
> +
> +  if (!visited_bbs->add(start_bb))
> +    {
> +      edge e;
> +      edge_iterator ei;
> +      FOR_EACH_EDGE (e, ei, start_bb->succs)
> +	if (fsm_find_thread_path (e->dest, end_bb, path, visited_bbs, n_insns))
> +	  {
> +	    vec_safe_push (path, start_bb);
> +	    return true;
> +	  }
> +    }
> +
> +  return false;
> +}
> +
> +static int max_threaded_paths;
> +
> +/* We trace the value of the variable EXPR back through any phi nodes looking
> +   for places where it gets a constant value and save the path.  Stop after
> +   having recorded MAX_PATHS jump threading paths.  */
> +
> +static void
> +fsm_find_control_statement_thread_paths (tree expr,
> +					 hash_set<gimple> *visited_phis,
> +					 vec<basic_block, va_gc> *&path)
> +{
> +  tree var = SSA_NAME_VAR (expr);
> +  gimple def_stmt = SSA_NAME_DEF_STMT (expr);
> +  basic_block var_bb = gimple_bb (def_stmt);
> +
> +  if (var == NULL || var_bb == NULL)
> +    return;
> +
> +  vec<basic_block, va_gc> *next_path;
> +  vec_alloc (next_path, n_basic_blocks_for_fn (cfun));
> +
> +  basic_block last_bb_in_path = path->last ();
> +
> +  /* Put the path from var_bb to last_bb_in_path into next_path.  */
> +  if (var_bb != last_bb_in_path)
> +    {
> +      edge e;
> +      int e_count = 0;
> +      edge_iterator ei;
> +
> +      FOR_EACH_EDGE (e, ei, last_bb_in_path->preds)
> +	{
> +	  hash_set<basic_block> *visited_bbs = new hash_set<basic_block>;
> +
> +	  if (fsm_find_thread_path (var_bb, e->src, next_path, visited_bbs, 0))
> +	    ++e_count;
> +
> +	  delete visited_bbs;
> +
> +	  /* If there is more than one path, stop.  */
> +	  if (e_count > 1)
> +	    {
> +	      vec_free (next_path);
> +	      return;
> +	    }
> +	}
> +    }
> +
> +  /* Visit PHI nodes once.  */
> +  if (gimple_code (def_stmt) != GIMPLE_PHI
> +      || visited_phis->add(def_stmt))
> +    {
> +      vec_free (next_path);
> +      return;
> +    }
> +
> +  /* Append all the nodes from next_path to path.  */
> +  vec_safe_splice (path, next_path);
> +  gcc_assert (path->last () == var_bb);
> +
> +  /* Iterate over the arguments of PHI.  */
> +  unsigned int i;
> +  for (i = 0; i < gimple_phi_num_args (def_stmt); i++)
> +    {
> +      tree arg = gimple_phi_arg_def (def_stmt, i);
> +      basic_block bbi = gimple_phi_arg_edge (def_stmt, i)->src;
> +
> +      /* Skip edges pointing outside the current loop.  */
> +      if (!arg || var_bb->loop_father != bbi->loop_father)
> +	continue;
> +
> +      /* Add BBI to the path.  */
> +      vec_safe_push (path, bbi);
> +
> +      if (TREE_CODE (arg) == INTEGER_CST)
> +	{
> +	  int j, n = path->length ();
> +	  vec<jump_thread_edge *> *jump_thread_path
> +	    = new vec<jump_thread_edge *> ();
> +	  int joiners = 0;
> +
> +	  for (j = 0; j < n - 1; j++)
> +	    {
> +	      edge e = find_edge ((*path)[n - j - 1],
> +				  (*path)[n - j - 2]);
> +	      gcc_assert (e);
> +	      enum jump_thread_edge_type kind;
> +
> +	      if (j == 0)
> +		kind = EDGE_START_FSM_THREAD;
> +	      else if (single_pred_p (e->src))
> +		kind = EDGE_NO_COPY_SRC_BLOCK;
> +	      else {
> +		kind = EDGE_COPY_SRC_JOINER_BLOCK;
> +		++joiners;
> +	      }
> +
> +	      jump_thread_edge *x = new jump_thread_edge (e, kind);
> +	      jump_thread_path->safe_push (x);
> +	    }
> +
> +	  /* Add the edge taken when the control variable has value ARG.  */
> +	  edge taken_edge = find_taken_edge ((*path)[0], arg);
> +	  jump_thread_edge *x
> +	    = new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK);
> +	  jump_thread_path->safe_push (x);
> +
> +	  /* A path with less than 3 nodes should not be jump-threaded.  */
> +	  if (n > 2 && n < PARAM_VALUE (PARAM_MAX_THREAD_LENGTH)
> +	      && max_threaded_paths > 0)
> +	    {
> +	      int n_insns = 0;
> +	      gimple_stmt_iterator gsi;
> +
> +	      for (j = 1; j < n - 1; j++)
> +		for (gsi = gsi_start_bb ((*path)[j]); !gsi_end_p (gsi); gsi_next (&gsi))
> +		  ++n_insns;
> +
> +	      if (n_insns < PARAM_VALUE (PARAM_MAX_THREAD_PATH_INSNS))
> +		{
> +		  register_jump_thread (jump_thread_path);
> +		  --max_threaded_paths;
> +		}
> +	    }
> +	}
> +      else if (TREE_CODE (arg) == SSA_NAME)
> +	fsm_find_control_statement_thread_paths (arg, visited_phis, path);
> +
> +      /* Remove BBI from the path.  */
> +      path->pop ();
> +    }
> +
> +  /* Remove all the nodes that we added from next_path.  */
> +  vec_safe_truncate (path, (path->length () - next_path->length ()));
> +  vec_free (next_path);
> +}
> +
>  /* We are exiting E->src, see if E->dest ends with a conditional
>     jump which has a known value when reached via E.
>  
> @@ -1032,7 +1207,10 @@ thread_through_normal_block (edge e,
>        cond = simplify_control_stmt_condition (e, stmt, dummy_cond, simplify,
>  					      handle_dominating_asserts);
>  
> -      if (cond && is_gimple_min_invariant (cond))
> +      if (!cond)
> +	return 0;
> +
> +      if (is_gimple_min_invariant (cond))
>  	{
>  	  edge taken_edge = find_taken_edge (e->dest, cond);
>  	  basic_block dest = (taken_edge ? taken_edge->dest : NULL);
> @@ -1078,6 +1256,26 @@ thread_through_normal_block (edge e,
>  				      backedge_seen_p);
>  	  return 1;
>  	}
> +
> +      if (TREE_CODE (cond) != SSA_NAME
> +	  || e->dest->loop_father != e->src->loop_father)
> +	return 0;
> +
> +      /* When COND cannot be simplified, try to find paths from a control
> +	 statement back through the PHI nodes which would affect that control
> +	 statement.  */
> +      vec<basic_block, va_gc> *bb_path;
> +      vec_alloc (bb_path, n_basic_blocks_for_fn (cfun));
> +      vec_safe_push (bb_path, e->dest);
> +      hash_set<gimple> *visited_phis = new hash_set<gimple>;
> +
> +      max_threaded_paths = PARAM_VALUE (PARAM_MAX_THREAD_PATHS);
> +      fsm_find_control_statement_thread_paths (cond, visited_phis, bb_path);
> +
> +      delete visited_phis;
> +      vec_free (bb_path);
> +
> +      return -1;
>      }
>    return 0;
>  }
> diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c
> index 151ed83..5847078 100644
> --- a/gcc/tree-ssa-threadupdate.c
> +++ b/gcc/tree-ssa-threadupdate.c
> @@ -167,8 +167,9 @@ dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
>  		       bool registering)
>  {
>    fprintf (dump_file,
> -	   "  %s jump thread: (%d, %d) incoming edge; ",
> +	   "  %s%s jump thread: (%d, %d) incoming edge; ",
>  	   (registering ? "Registering" : "Cancelling"),
> +	   (path[0]->type == EDGE_START_FSM_THREAD ? " FSM": ""),
>  	   path[0]->e->src->index, path[0]->e->dest->index);
>  
>    for (unsigned int i = 1; i < path.length (); i++)
> @@ -2343,6 +2344,55 @@ thread_through_all_blocks (bool may_peel_loop_headers)
>    threaded_blocks = BITMAP_ALLOC (NULL);
>    memset (&thread_stats, 0, sizeof (thread_stats));
>  
> +  for (i = 0; i < paths.length (); )
> +    {
> +      vec<jump_thread_edge *> *path = paths[i];
> +      edge entry = (*path)[0]->e;
> +
> +      if ((*path)[0]->type != EDGE_START_FSM_THREAD
> +	  /* Do not jump-thread twice from the same block.  */
> +	  || bitmap_bit_p (threaded_blocks, entry->src->index)) {
> +	i++;
> +	continue;
> +      }
> +
> +      unsigned len = path->length ();
> +      edge exit = (*path)[len - 1]->e;
> +      basic_block *region = XNEWVEC (basic_block, len - 1);
> +
> +      for (unsigned int j = 0; j < len - 1; j++)
> +	region[j] = (*path)[j]->e->dest;
> +
> +      bool success = gimple_duplicate_sese_region (entry, exit, region,
> +						   len - 1, NULL, 0);
> +      delete_jump_thread_path (path);
> +      paths.unordered_remove (i);
> +
> +      if (success) {
> +	/* We do not update dominance info.  */
> +	free_dominance_info (CDI_DOMINATORS);
> +
> +	bitmap_set_bit (threaded_blocks, entry->src->index);
> +      }
> +    }
> +
> +  for (i = 0; i < paths.length (); )
> +    {
> +      vec<jump_thread_edge *> *path = paths[i];
> +      edge entry = (*path)[0]->e;
> +
> +      /* Do not jump-thread twice from the same block.  */
> +      if (bitmap_bit_p (threaded_blocks, entry->src->index))
> +	{
> +	  delete_jump_thread_path (path);
> +	  paths.unordered_remove (i);
> +	}
> +      else
> +	i++;
> +    }
> +
> +  bitmap_clear (threaded_blocks);
> +
>    mark_threaded_blocks (threaded_blocks);
>  
>    initialize_original_copy_tables ();
> diff --git a/gcc/tree-ssa-threadupdate.h b/gcc/tree-ssa-threadupdate.h
> index 426aca5..42c3a9e 100644
> --- a/gcc/tree-ssa-threadupdate.h
> +++ b/gcc/tree-ssa-threadupdate.h
> @@ -26,6 +26,7 @@ extern bool thread_through_all_blocks (bool);
>  enum jump_thread_edge_type
>  {
>    EDGE_START_JUMP_THREAD,
> +  EDGE_START_FSM_THREAD,
>    EDGE_COPY_SRC_BLOCK,
>    EDGE_COPY_SRC_JOINER_BLOCK,
>    EDGE_NO_COPY_SRC_BLOCK
> -- 
> 2.1.0.243.g30d45f7


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]