This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][6/n] Merge from match-and-simplify, make forwprop fold all stmts


On Fri, 24 Oct 2014, Jeff Law wrote:

> On 10/24/14 07:16, Richard Biener wrote:
> > 
> > This patch makes GIMPLE forwprop fold all statements, following
> > single-use SSA edges only (as suggested by Jeff and certainly
> > how this will regress the least until we replace manual
> > simplification code that does not restrict itself this way).
> > 
> > forwprop is run up to 4 times at the moment (once only for -Og,
> > not at all for -O0), which still seems reasonable.  IMHO the
> > forwprop pass immediately after inlining is somewhat superfluous,
> > it was added there just for its ADDR_EXPR propagation.  We should
> > eventually split this pass into two.
> > 
> > Note that just folding what we propagated into (like the SSA
> > propagators do during substitute-and-fold phase) will miss
> > cases where we propagate into a stmt feeding the one we could
> > simplify.  Unless we always fold all single-use (and their use)
> > stmts we have to fold everything from time to time.  Changing
> > how / when we fold stuff is certainly sth to look after with
> > fold_stmt now being able to follow SSA edges.
> > 
> > Bootstrapped on x86_64-unknown-linux-gnu, testing still in progress.
> > 
> >  From earlier testing I remember I need to adjust a few testcases
> > that don't expect the early folding - notably two strlenopt cases
> > (previously XFAILed but then PASSed again).
> > 
> > I also expect to massage the single-use heuristic as I get to
> > merging the patterns I added for the various forwprop manual
> > pattern matchings to trunk (a lot of them do not restrict themselves
> > this way).
> > 
> > Does this otherwise look ok?
> > 
> > Thanks,
> > Richard.
> > 
> > 2014-10-24  Richard Biener  <rguenther@suse.de>
> > 
> > 	* tree-ssa-forwprop.c: Include tree-cfgcleanup.h and tree-into-ssa.h.
> > 	(lattice): New global.
> > 	(fwprop_ssa_val): New function.
> > 	(fold_all_stmts): Likewise.
> > 	(pass_forwprop::execute): Finally fold all stmts.
> Seems reasonable.  After all, we can iterate on the single-use heuristic.

The following is what I applied - two testcases needed adjustments,
the forwprop-6.c one already has in its comments that the transform
we look for happens during CCP1 already.  strlenopt-8.c fails to
optimize a strlen because it cannot handle 2-byte loads/stores
in its analysis and we now inline-expand the 2-byte memcpy earlier.

Thanks,
Richard.

2014-10-27  Richard Biener  <rguenther@suse.de>

	* tree-ssa-forwprop.c: Include tree-cfgcleanup.h and tree-into-ssa.h.
	(lattice): New global.
	(fwprop_ssa_val): New function.
	(fold_all_stmts): Likewise.
	(pass_forwprop::execute): Finally fold all stmts.

	* gcc.dg/tree-ssa/forwprop-6.c: Scan ccp1 dump instead.
	* gcc.dg/strlenopt-8.c: Adjust and XFAIL for non_strict_align
	target due to memcpy inline-expansion.

Index: gcc/tree-ssa-forwprop.c
===================================================================
*** gcc/tree-ssa-forwprop.c.orig	2014-10-20 14:39:23.998954586 +0200
--- gcc/tree-ssa-forwprop.c	2014-10-27 12:07:48.489693195 +0100
*************** along with GCC; see the file COPYING3.
*** 54,59 ****
--- 54,61 ----
  #include "tree-ssa-propagate.h"
  #include "tree-ssa-dom.h"
  #include "builtins.h"
+ #include "tree-cfgcleanup.h"
+ #include "tree-into-ssa.h"
  
  /* This pass propagates the RHS of assignment statements into use
     sites of the LHS of the assignment.  It's basically a specialized
*************** simplify_mult (gimple_stmt_iterator *gsi
*** 3586,3591 ****
--- 3588,3680 ----
  
    return false;
  }
+ 
+ 
+ /* Const-and-copy lattice for fold_all_stmts.  */
+ static vec<tree> lattice;
+ 
+ /* Primitive "lattice" function for gimple_simplify.  */
+ 
+ static tree
+ fwprop_ssa_val (tree name)
+ {
+   /* First valueize NAME.  */
+   if (TREE_CODE (name) == SSA_NAME
+       && SSA_NAME_VERSION (name) < lattice.length ())
+     {
+       tree val = lattice[SSA_NAME_VERSION (name)];
+       if (val)
+ 	name = val;
+     }
+   /* If NAME is not the only use signal we don't want to continue
+      matching into its definition.  */
+   if (TREE_CODE (name) == SSA_NAME
+       && !has_single_use (name))
+     return NULL_TREE;
+   return name;
+ }
+ 
+ /* Fold all stmts using fold_stmt following only single-use chains
+    and using a simple const-and-copy lattice.  */
+ 
+ static bool
+ fold_all_stmts (struct function *fun)
+ {
+   bool cfg_changed = false;
+ 
+   /* Combine stmts with the stmts defining their operands.  Do that
+      in an order that guarantees visiting SSA defs before SSA uses.  */
+   lattice.create (num_ssa_names);
+   lattice.quick_grow_cleared (num_ssa_names);
+   int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (fun));
+   int postorder_num = inverted_post_order_compute (postorder);
+   for (int i = 0; i < postorder_num; ++i)
+     {
+       basic_block bb = BASIC_BLOCK_FOR_FN (fun, postorder[i]);
+       for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ 	   !gsi_end_p (gsi); gsi_next (&gsi))
+ 	{
+ 	  gimple stmt = gsi_stmt (gsi);
+ 	  gimple orig_stmt = stmt;
+ 
+ 	  if (fold_stmt (&gsi, fwprop_ssa_val))
+ 	    {
+ 	      stmt = gsi_stmt (gsi);
+ 	      if (maybe_clean_or_replace_eh_stmt (orig_stmt, stmt)
+ 		  && gimple_purge_dead_eh_edges (bb))
+ 		cfg_changed = true;
+ 	      /* Cleanup the CFG if we simplified a condition to
+ 	         true or false.  */
+ 	      if (gimple_code (stmt) == GIMPLE_COND
+ 		  && (gimple_cond_true_p (stmt)
+ 		      || gimple_cond_false_p (stmt)))
+ 		cfg_changed = true;
+ 	      update_stmt (stmt);
+ 	    }
+ 
+ 	  /* Fill up the lattice.  */
+ 	  if (gimple_assign_single_p (stmt))
+ 	    {
+ 	      tree lhs = gimple_assign_lhs (stmt);
+ 	      tree rhs = gimple_assign_rhs1 (stmt);
+ 	      if (TREE_CODE (lhs) == SSA_NAME)
+ 		{
+ 		  if (TREE_CODE (rhs) == SSA_NAME)
+ 		    lattice[SSA_NAME_VERSION (lhs)] = fwprop_ssa_val (rhs);
+ 		  else if (is_gimple_min_invariant (rhs))
+ 		    lattice[SSA_NAME_VERSION (lhs)] = rhs;
+ 		  else
+ 		    lattice[SSA_NAME_VERSION (lhs)] = lhs;
+ 		}
+ 	    }
+ 	}
+     }
+   free (postorder);
+   lattice.release ();
+ 
+   return cfg_changed;
+ }
+ 
  /* Main entry point for the forward propagation and statement combine
     optimizer.  */
  
*************** pass_forwprop::execute (function *fun)
*** 3876,3881 ****
--- 3965,3973 ----
  	}
      }
  
+   /* At the end fold all statements.  */
+   cfg_changed |= fold_all_stmts (fun);
+ 
    if (cfg_changed)
      todoflags |= TODO_cleanup_cfg;
  
Index: gcc/testsuite/gcc.dg/strlenopt-8.c
===================================================================
*** gcc/testsuite/gcc.dg/strlenopt-8.c.orig	2014-08-08 11:26:25.431994867 +0200
--- gcc/testsuite/gcc.dg/strlenopt-8.c	2014-10-27 12:14:45.335664496 +0100
*************** main ()
*** 43,50 ****
    return 0;
  }
  
! /* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen" } } */
! /* { dg-final { scan-tree-dump-times "memcpy \\(" 4 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strcpy \\(" 0 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */
--- 43,55 ----
    return 0;
  }
  
! /* On non-strict-align targets we inline the memcpy that strcat is turned
!    into and end up with a short typed load / store which strlenopt is not
!    able to analyze.  */
! 
! /* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen" { xfail non_strict_align } } } */
! /* { dg-final { scan-tree-dump-times "memcpy \\(" 2 "strlen" { target { non_strict_align } } } } */
! /* { dg-final { scan-tree-dump-times "memcpy \\(" 4 "strlen" { target { ! non_strict_align } } } }  */
  /* { dg-final { scan-tree-dump-times "strcpy \\(" 0 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */
  /* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/forwprop-6.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/forwprop-6.c.orig	2013-08-27 11:13:21.638029011 +0200
--- gcc/testsuite/gcc.dg/tree-ssa/forwprop-6.c	2014-10-27 12:19:55.765643123 +0100
***************
*** 1,5 ****
  /* { dg-do compile } */
! /* { dg-options "-O2 -fdump-tree-forwprop1 -W -Wall" } */
  #if (__SIZEOF_INT__ == __SIZEOF_FLOAT__)
  typedef int intflt;
  #elif (__SIZEOF_LONG__ == __SIZEOF_FLOAT__)
--- 1,5 ----
  /* { dg-do compile } */
! /* { dg-options "-O2 -fdump-tree-ccp1 -W -Wall" } */
  #if (__SIZEOF_INT__ == __SIZEOF_FLOAT__)
  typedef int intflt;
  #elif (__SIZEOF_LONG__ == __SIZEOF_FLOAT__)
*************** void f(void)
*** 24,28 ****
     it to be valid.  Then we might as well handle the situation by
     value-numbering, removing the load altogether.
     ???  We now do this after CPP re-writes a into SSA form.  */
! /* { dg-final { scan-tree-dump-times "VIEW_CONVERT_EXPR" 1 "forwprop1" } } */
! /* { dg-final { cleanup-tree-dump "forwprop1" } } */
--- 24,28 ----
     it to be valid.  Then we might as well handle the situation by
     value-numbering, removing the load altogether.
     ???  We now do this after CPP re-writes a into SSA form.  */
! /* { dg-final { scan-tree-dump-times "VIEW_CONVERT_EXPR" 1 "ccp1" } } */
! /* { dg-final { cleanup-tree-dump "ccp1" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]