This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Add un-distribution to tree-ssa-reassoc (2nd try)


This adds un-distribution capabilities to tree-ssa-reassoc:
a * b + a * c -> a * (b + c)

The SPEC2006 454.calculix benchmark benefits from this and gains 25%
in performance with -O3 --param max-completely-peeled-insns=100000 
-ffast-math.  Of course there are also a number of missed-optimization
PRs around this topic.  SPEC2000 is unaffected by this patch as is
tramp3d or any other benchmark we regularly test.

This version of the patch fixes the compile-time problems of the earlier
one.

Bootstrap and regtest re-running on x86_64-unknown-linux-gnu, ok if
that passes?

Thanks,
Richard.

2008-05-15  Richard Guenther  <rguenther@suse.de>

	PR tree-optimization/15255
	* tree-ssa-reassoc.c (undistribute_ops_list): Perform
	un-distribution of multiplication and division on the
	chain of summands.
	(should_break_up_subtract): Also break up subtracts for
	factors.
	(reassociate_bb): Call undistribute_ops_list.

	* gcc.dg/tree-ssa/reassoc-14.c: New testcase.
	* gcc.dg/tree-ssa/reassoc-15.c: Likewise.
	* gcc.dg/tree-ssa/reassoc-16.c: Likewise.
	* gcc.dg/torture/reassoc-1.c: Likewise.
	* gcc.dg/tree-ssa/recip-2.c: Adjust.
	* gcc.dg/tree-ssa/recip-6.c: Likewise.
	* gcc.dg/tree-ssa/recip-7.c: Likewise.

Index: gcc/tree-ssa-reassoc.c
===================================================================
*** gcc/tree-ssa-reassoc.c.orig	2008-05-15 16:13:10.000000000 +0200
--- gcc/tree-ssa-reassoc.c	2008-05-15 17:19:09.000000000 +0200
*************** eliminate_using_constants (enum tree_cod
*** 722,727 ****
--- 722,901 ----
      }
  }
  
+ /* Perform un-distribution of divisions and multiplications.
+    A * X + B * X is transformed into (A + B) * X and A / X + B / X
+    to (A + B) / X for real X.  */
+ 
+ static void
+ undistribute_ops_list (enum tree_code opcode,
+ 		       VEC (operand_entry_t, heap) **ops, struct loop *loop)
+ {
+   unsigned int length = VEC_length (operand_entry_t, *ops);
+   operand_entry_t oe1, oe2;
+   unsigned i, j;
+   sbitmap candidates;
+   unsigned nr_candidates;
+   sbitmap_iterator sbi0, sbi1;
+ 
+   if (length <= 1
+       || opcode != PLUS_EXPR)
+     return;
+ 
+   /* Build a list of candidates to process.  */
+   candidates = sbitmap_alloc (length);
+   sbitmap_zero (candidates);
+   nr_candidates = 0;
+   for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe1); ++i)
+     {
+       enum tree_code dcode;
+       tree oe1def;
+ 
+       if (TREE_CODE (oe1->op) != SSA_NAME)
+ 	continue;
+       oe1def = SSA_NAME_DEF_STMT (oe1->op);
+       if (TREE_CODE (oe1def) != GIMPLE_MODIFY_STMT)
+ 	continue;
+       dcode = TREE_CODE (GIMPLE_STMT_OPERAND (oe1def, 1));
+       if ((dcode != MULT_EXPR
+ 	   && dcode != RDIV_EXPR)
+ 	  || !is_reassociable_op (oe1def, dcode, loop))
+ 	continue;
+ 
+       SET_BIT (candidates, i);
+       nr_candidates++;
+     }
+ 
+   if (nr_candidates < 2)
+     {
+       sbitmap_free (candidates);
+       return;
+     }
+ 
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+       fprintf (dump_file, "searching for un-distribute opportunities ");
+       print_generic_expr (dump_file,
+ 	VEC_index (operand_entry_t, *ops,
+ 		   sbitmap_first_set_bit (candidates))->op, 0);
+       fprintf (dump_file, " %d\n", nr_candidates);
+     }
+ 
+   EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
+     {
+       enum tree_code dcode;
+       tree oe1def;
+ 
+       oe1 = VEC_index (operand_entry_t, *ops, i);
+       oe1def = SSA_NAME_DEF_STMT (oe1->op);
+       dcode = TREE_CODE (GIMPLE_STMT_OPERAND (oe1def, 1));
+ 
+       EXECUTE_IF_SET_IN_SBITMAP (candidates, i + 1, j, sbi1)
+ 	{
+ 	  tree oe2def;
+ 	  tree op10, op11, op20, op21, tmp, tmpvar, op;
+ 	  block_stmt_iterator bsi;
+ 	  use_operand_p use_p;
+ 	  tree use_stmt;
+ 
+ 	  oe2 = VEC_index (operand_entry_t, *ops, j);
+ 	  oe2def = SSA_NAME_DEF_STMT (oe2->op);
+ 	  if (TREE_CODE (GIMPLE_STMT_OPERAND (oe2def, 1)) != dcode)
+ 	    continue;
+ 
+ 	  op10 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe1def, 1), 0);
+ 	  op11 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe1def, 1), 1);
+ 	  op20 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe2def, 1), 0);
+ 	  op21 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe2def, 1), 1);
+ 	  if (op10 == op20
+ 	      && dcode != RDIV_EXPR)
+ 	    {
+ 	      tmp = op10; op10 = op11; op11 = tmp;
+ 	      tmp = op20; op20 = op21; op21 = tmp;
+ 	    }
+ 	  else if (op10 == op21
+ 		   && dcode != RDIV_EXPR)
+ 	    {
+ 	      tmp = op10; op10 = op11; op11 = tmp;
+ 	    }
+ 	  else if (op11 == op20
+ 		   && dcode != RDIV_EXPR)
+ 	    {
+ 	      tmp = op20; op20 = op21; op21 = tmp;
+ 	    }
+ 	  else if (op11 == op21)
+ 	    ;
+ 	  else
+ 	    continue;
+ 
+ 	  if (dump_file && (dump_flags & TDF_DETAILS))
+ 	    {
+ 	      fprintf (dump_file, "applying reverse distributive law to ");
+ 	      print_generic_expr (dump_file, oe1def, 0);
+ 	      fprintf (dump_file, " and ");
+ 	      print_generic_expr (dump_file, oe2def, 0);
+ 	      fprintf (dump_file, "\n");
+ 	    }
+ 
+ 	  /* Build two new stmts to avoid assigning different values to
+ 	     names than in the original program (for debugging) and to
+ 	     make sure not to break dominator relationships.  */
+ 	  tmpvar = create_tmp_var (TREE_TYPE (op10), NULL);
+ 	  add_referenced_var (tmpvar);
+ 	  if (stmt_dominates_stmt_p (oe1def, oe2def))
+ 	    bsi = bsi_for_stmt (oe2def);
+ 	  else
+ 	    bsi = bsi_for_stmt (oe1def);
+ 
+ 	  tmp = build_gimple_modify_stmt (NULL_TREE,
+ 					  fold_build2 (opcode, TREE_TYPE (op10),
+ 						       op10, op20));
+ 	  op = make_ssa_name (tmpvar, tmp);
+ 	  GIMPLE_STMT_OPERAND (tmp, 0) = op;
+ 	  bsi_insert_before (&bsi, tmp, BSI_SAME_STMT);
+ 	  update_stmt (tmp);
+ 
+ 	  tmp = build_gimple_modify_stmt (NULL_TREE,
+ 					  fold_build2 (dcode, TREE_TYPE (op10),
+ 						       op, op11));
+ 	  op = make_ssa_name (tmpvar, tmp);
+ 	  GIMPLE_STMT_OPERAND (tmp, 0) = op;
+ 	  bsi_insert_before (&bsi, tmp, BSI_SAME_STMT);
+ 	  update_stmt (tmp);
+ 
+ 	  /* Remove the immediate uses of the old stmts and replace one
+ 	     with the new product.  */
+ 	  gcc_assert (has_single_use (oe1->op)
+ 		      && has_single_use (oe2->op));
+ 	  single_imm_use (oe1->op, &use_p, &use_stmt);
+ 	  SET_USE (use_p, op);
+ 	  update_stmt (use_stmt);
+ 	  single_imm_use (oe2->op, &use_p, &use_stmt);
+ 	  SET_USE (use_p,
+ 		   fold_convert (TREE_TYPE (oe2->op), integer_zero_node));
+ 	  update_stmt (use_stmt);
+ 
+ 	  /* Now remove the old stmts defining the summands.  */
+ 	  bsi = bsi_for_stmt (oe1def);
+ 	  bsi_remove (&bsi, true);
+ 	  release_defs (oe1def);
+ 	  bsi = bsi_for_stmt (oe2def);
+ 	  bsi_remove (&bsi, true);
+ 	  release_defs (oe2def);
+ 
+ 	  /* Finally recurse with the operands removed.  */
+ 	  VEC_unordered_remove (operand_entry_t, *ops, j);
+ 	  VEC_unordered_remove (operand_entry_t, *ops, i);
+ 	  add_to_ops_vec (ops, op);
+ 	  sbitmap_free (candidates);
+ 	  if (nr_candidates > 2)
+ 	    undistribute_ops_list (opcode, ops, loop);
+ 	  return;
+ 	}
+     }
+ 
+   sbitmap_free (candidates);
+ }
+ 
  /* Perform various identities and other optimizations on the list of
     operand entries, stored in OPS.  The tree code for the binary
     operation between all the operands is OPCODE.  */
*************** should_break_up_subtract (tree stmt)
*** 1093,1099 ****
  
    if (TREE_CODE (lhs) == SSA_NAME
        && (immusestmt = get_single_immediate_use (lhs))
!       && TREE_CODE (GIMPLE_STMT_OPERAND (immusestmt, 1)) == PLUS_EXPR)
      return true;
    return false;
  
--- 1267,1274 ----
  
    if (TREE_CODE (lhs) == SSA_NAME
        && (immusestmt = get_single_immediate_use (lhs))
!       && (TREE_CODE (GIMPLE_STMT_OPERAND (immusestmt, 1)) == PLUS_EXPR
! 	  || TREE_CODE (GIMPLE_STMT_OPERAND (immusestmt, 1)) == MULT_EXPR))
      return true;
    return false;
  
*************** reassociate_bb (basic_block bb)
*** 1355,1366 ****
--- 1530,1545 ----
  
  	      TREE_VISITED (stmt) = 1;
  	      linearize_expr_tree (&ops, stmt);
+ 
  	      qsort (VEC_address (operand_entry_t, ops),
  		     VEC_length (operand_entry_t, ops),
  		     sizeof (operand_entry_t),
  		     sort_by_operand_rank);
  	      optimize_ops_list (TREE_CODE (rhs), &ops);
  
+ 	      undistribute_ops_list (TREE_CODE (rhs), &ops,
+ 				     loop_containing_stmt (stmt));
+ 
  	      if (VEC_length (operand_entry_t, ops) == 1)
  		{
  		  if (dump_file && (dump_flags & TDF_DETAILS))
Index: gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c	2008-05-15 17:14:18.000000000 +0200
***************
*** 0 ****
--- 1,23 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-reassoc1" } */
+ 
+ int test1 (int x, int y, int z, int weight)
+ {
+   int tmp1 = x * weight;
+   int tmp2 = y * weight;
+   int tmp3 = (x - y) * weight;
+   return tmp1 + (tmp2 + tmp3);
+ }
+ 
+ int test2 (int x, int y, int z, int weight)
+ {
+   int tmp1 = x * weight;
+   int tmp2 = y * weight * weight;
+   int tmp3 = z * weight * weight * weight;
+   return tmp1 + tmp2 + tmp3;
+ }
+ 
+ /* There should be one multiplication left in test1 and three in test2.  */
+ 
+ /* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" } } */
+ /* { dg-final { cleanup-tree-dump "reassoc1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c	2008-05-15 17:14:18.000000000 +0200
***************
*** 0 ****
--- 1,19 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-reassoc1" } */
+ 
+ int test3 (int x, int y, int z, int weight, int w1, int w2, int w3)
+ {
+   int wtmp1 = w1 * weight;
+   int wtmp2 = w2 * weight;
+   int wtmp3 = w3 * weight;
+   int tmp1 = x * wtmp1;
+   int tmp2 = y * wtmp2;
+   int tmp3 = z * wtmp3;
+   return tmp1 + tmp2 + tmp3;
+ }
+ 
+ /* The multiplication with weight should be un-distributed.
+    ???  This pattern is not recognized currently.  */
+ 
+ /* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" { xfail *-*-* } } } */
+ /* { dg-final { cleanup-tree-dump "reassoc1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c	2008-05-15 17:14:18.000000000 +0200
***************
*** 0 ****
--- 1,16 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
+ 
+ double test1 (double x, double y, double z, double weight)
+ {
+   double tmp1 = x / weight;
+   double tmp2 = y / weight;
+   double tmp3 = -x / weight;
+   return tmp1 + tmp2 + tmp3;
+ }
+ 
+ /* The division should be un-distributed and all references to x should
+    be gone.  */
+ 
+ /* { dg-final { scan-tree-dump-times "/" 1 "reassoc1" } } */
+ /* { dg-final { cleanup-tree-dump "reassoc1" } } */
Index: gcc/testsuite/gcc.dg/torture/reassoc-1.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/torture/reassoc-1.c	2008-05-15 17:14:18.000000000 +0200
***************
*** 0 ****
--- 1,24 ----
+ /* { dg-do run } */
+ 
+ int x;
+ 
+ int __attribute__((noinline))
+ foo(int a, int b, int w)
+ {
+   int tmp1 = a * w;
+   int tmp2 = b * w;
+   x = tmp1;
+   return tmp1 + tmp2;
+ }
+ 
+ extern void abort (void);
+ 
+ int main()
+ {
+   if (foo(1, 2, 3) != 9)
+     abort ();
+   if (x != 3)
+     abort ();
+   return 0;
+ }
+ 
Index: gcc/testsuite/gcc.dg/tree-ssa/recip-2.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/recip-2.c.orig	2008-05-14 17:27:08.000000000 +0200
--- gcc/testsuite/gcc.dg/tree-ssa/recip-2.c	2008-05-15 17:14:18.000000000 +0200
***************
*** 1,7 ****
  /* { dg-do compile } */
  /* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */
  
! float e(float a, float b, float c, float d, float e, float f)
  {
    if (a < b)
      {
--- 1,9 ----
  /* { dg-do compile } */
  /* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */
  
! float u, v, w, x, y, z;
! 
! void e(float a, float b, float c, float d, float e, float f)
  {
    if (a < b)
      {
*************** float e(float a, float b, float c, float
*** 20,26 ****
    /* This should not be left as a multiplication.  */
    c = 1 / c;
  
!   return a + b + c + d + e + f;
  }
  
  /* { dg-final { scan-tree-dump-times " / " 2 "recip" } } */
--- 22,33 ----
    /* This should not be left as a multiplication.  */
    c = 1 / c;
  
!   u = a;
!   v = b;
!   w = c;
!   x = d;
!   y = e;
!   z = f;
  }
  
  /* { dg-final { scan-tree-dump-times " / " 2 "recip" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/recip-6.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/recip-6.c.orig	2008-05-14 17:27:08.000000000 +0200
--- gcc/testsuite/gcc.dg/tree-ssa/recip-6.c	2008-05-15 17:14:18.000000000 +0200
***************
*** 5,11 ****
  
  extern int f2();
  
! double f1(double y, double z, double w)
  {
    double b, c, d, e, f;
  
--- 5,13 ----
  
  extern int f2();
  
! double m, n, o;
! 
! void f1(double y, double z, double w)
  {
    double b, c, d, e, f;
  
*************** double f1(double y, double z, double w)
*** 18,24 ****
    e = c / y;
    f = 1 / y;
  
!   return d + e + f;
  }
  
  /* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
--- 20,28 ----
    e = c / y;
    f = 1 / y;
  
!   m = d;
!   n = e;
!   o = f;
  }
  
  /* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/recip-7.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/recip-7.c.orig	2008-05-14 17:27:08.000000000 +0200
--- gcc/testsuite/gcc.dg/tree-ssa/recip-7.c	2008-05-15 17:14:18.000000000 +0200
***************
*** 5,11 ****
  
  extern double h();
  
! double f(int x, double z, double w)
  {
    double b, c, d, e, f;
    double y = h ();
--- 5,13 ----
  
  extern double h();
  
! double m, n, o;
! 
! void f(int x, double z, double w)
  {
    double b, c, d, e, f;
    double y = h ();
*************** double f(int x, double z, double w)
*** 19,25 ****
    e = c / y;
    f = 1 / y;
  
!   return d + e + f;
  }
  
  /* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
--- 21,29 ----
    e = c / y;
    f = 1 / y;
  
!   m = d;
!   n = e;
!   o = f;
  }
  
  /* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]