This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Add un-distribution to tree-ssa-reassoc (2nd try)
- From: Richard Guenther <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 15 May 2008 17:28:18 +0200 (CEST)
- Subject: [PATCH] Add un-distribution to tree-ssa-reassoc (2nd try)
This adds un-distribution capabilities to tree-ssa-reassoc:
a * b + a * c -> a * (b + c)
The SPEC2006 454.calculix benchmark benefits from this and gains 25%
in performance with -O3 --param max-completely-peeled-insns=100000
-ffast-math. Of course there are also a number of missed-optimization
PRs around this topic. SPEC2000 is unaffected by this patch as is
tramp3d or any other benchmark we regularly test.
This version of the patch fixes the compile-time problems of the earlier
one.
Bootstrap and regtest re-running on x86_64-unknown-linux-gnu, ok if
that passes?
Thanks,
Richard.
2008-05-15 Richard Guenther <rguenther@suse.de>
PR tree-optimization/15255
* tree-ssa-reassoc.c (undistribute_ops_list): Perform
un-distribution of multiplication and division on the
chain of summands.
(should_break_up_subtract): Also break up subtracts for
factors.
(reassociate_bb): Call undistribute_ops_list.
* gcc.dg/tree-ssa/reassoc-14.c: New testcase.
* gcc.dg/tree-ssa/reassoc-15.c: Likewise.
* gcc.dg/tree-ssa/reassoc-16.c: Likewise.
* gcc.dg/torture/reassoc-1.c: Likewise.
* gcc.dg/tree-ssa/recip-2.c: Adjust.
* gcc.dg/tree-ssa/recip-6.c: Likewise.
* gcc.dg/tree-ssa/recip-7.c: Likewise.
Index: gcc/tree-ssa-reassoc.c
===================================================================
*** gcc/tree-ssa-reassoc.c.orig 2008-05-15 16:13:10.000000000 +0200
--- gcc/tree-ssa-reassoc.c 2008-05-15 17:19:09.000000000 +0200
*************** eliminate_using_constants (enum tree_cod
*** 722,727 ****
--- 722,901 ----
}
}
+ /* Perform un-distribution of divisions and multiplications.
+ A * X + B * X is transformed into (A + B) * X and A / X + B / X
+ to (A + B) / X for real X. */
+
+ static void
+ undistribute_ops_list (enum tree_code opcode,
+ VEC (operand_entry_t, heap) **ops, struct loop *loop)
+ {
+ unsigned int length = VEC_length (operand_entry_t, *ops);
+ operand_entry_t oe1, oe2;
+ unsigned i, j;
+ sbitmap candidates;
+ unsigned nr_candidates;
+ sbitmap_iterator sbi0, sbi1;
+
+ if (length <= 1
+ || opcode != PLUS_EXPR)
+ return;
+
+ /* Build a list of candidates to process. */
+ candidates = sbitmap_alloc (length);
+ sbitmap_zero (candidates);
+ nr_candidates = 0;
+ for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe1); ++i)
+ {
+ enum tree_code dcode;
+ tree oe1def;
+
+ if (TREE_CODE (oe1->op) != SSA_NAME)
+ continue;
+ oe1def = SSA_NAME_DEF_STMT (oe1->op);
+ if (TREE_CODE (oe1def) != GIMPLE_MODIFY_STMT)
+ continue;
+ dcode = TREE_CODE (GIMPLE_STMT_OPERAND (oe1def, 1));
+ if ((dcode != MULT_EXPR
+ && dcode != RDIV_EXPR)
+ || !is_reassociable_op (oe1def, dcode, loop))
+ continue;
+
+ SET_BIT (candidates, i);
+ nr_candidates++;
+ }
+
+ if (nr_candidates < 2)
+ {
+ sbitmap_free (candidates);
+ return;
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "searching for un-distribute opportunities ");
+ print_generic_expr (dump_file,
+ VEC_index (operand_entry_t, *ops,
+ sbitmap_first_set_bit (candidates))->op, 0);
+ fprintf (dump_file, " %d\n", nr_candidates);
+ }
+
+ EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
+ {
+ enum tree_code dcode;
+ tree oe1def;
+
+ oe1 = VEC_index (operand_entry_t, *ops, i);
+ oe1def = SSA_NAME_DEF_STMT (oe1->op);
+ dcode = TREE_CODE (GIMPLE_STMT_OPERAND (oe1def, 1));
+
+ EXECUTE_IF_SET_IN_SBITMAP (candidates, i + 1, j, sbi1)
+ {
+ tree oe2def;
+ tree op10, op11, op20, op21, tmp, tmpvar, op;
+ block_stmt_iterator bsi;
+ use_operand_p use_p;
+ tree use_stmt;
+
+ oe2 = VEC_index (operand_entry_t, *ops, j);
+ oe2def = SSA_NAME_DEF_STMT (oe2->op);
+ if (TREE_CODE (GIMPLE_STMT_OPERAND (oe2def, 1)) != dcode)
+ continue;
+
+ op10 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe1def, 1), 0);
+ op11 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe1def, 1), 1);
+ op20 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe2def, 1), 0);
+ op21 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe2def, 1), 1);
+ if (op10 == op20
+ && dcode != RDIV_EXPR)
+ {
+ tmp = op10; op10 = op11; op11 = tmp;
+ tmp = op20; op20 = op21; op21 = tmp;
+ }
+ else if (op10 == op21
+ && dcode != RDIV_EXPR)
+ {
+ tmp = op10; op10 = op11; op11 = tmp;
+ }
+ else if (op11 == op20
+ && dcode != RDIV_EXPR)
+ {
+ tmp = op20; op20 = op21; op21 = tmp;
+ }
+ else if (op11 == op21)
+ ;
+ else
+ continue;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "applying reverse distributive law to ");
+ print_generic_expr (dump_file, oe1def, 0);
+ fprintf (dump_file, " and ");
+ print_generic_expr (dump_file, oe2def, 0);
+ fprintf (dump_file, "\n");
+ }
+
+ /* Build two new stmts to avoid assigning different values to
+ names than in the original program (for debugging) and to
+ make sure not to break dominator relationships. */
+ tmpvar = create_tmp_var (TREE_TYPE (op10), NULL);
+ add_referenced_var (tmpvar);
+ if (stmt_dominates_stmt_p (oe1def, oe2def))
+ bsi = bsi_for_stmt (oe2def);
+ else
+ bsi = bsi_for_stmt (oe1def);
+
+ tmp = build_gimple_modify_stmt (NULL_TREE,
+ fold_build2 (opcode, TREE_TYPE (op10),
+ op10, op20));
+ op = make_ssa_name (tmpvar, tmp);
+ GIMPLE_STMT_OPERAND (tmp, 0) = op;
+ bsi_insert_before (&bsi, tmp, BSI_SAME_STMT);
+ update_stmt (tmp);
+
+ tmp = build_gimple_modify_stmt (NULL_TREE,
+ fold_build2 (dcode, TREE_TYPE (op10),
+ op, op11));
+ op = make_ssa_name (tmpvar, tmp);
+ GIMPLE_STMT_OPERAND (tmp, 0) = op;
+ bsi_insert_before (&bsi, tmp, BSI_SAME_STMT);
+ update_stmt (tmp);
+
+ /* Remove the immediate uses of the old stmts and replace one
+ with the new product. */
+ gcc_assert (has_single_use (oe1->op)
+ && has_single_use (oe2->op));
+ single_imm_use (oe1->op, &use_p, &use_stmt);
+ SET_USE (use_p, op);
+ update_stmt (use_stmt);
+ single_imm_use (oe2->op, &use_p, &use_stmt);
+ SET_USE (use_p,
+ fold_convert (TREE_TYPE (oe2->op), integer_zero_node));
+ update_stmt (use_stmt);
+
+ /* Now remove the old stmts defining the summands. */
+ bsi = bsi_for_stmt (oe1def);
+ bsi_remove (&bsi, true);
+ release_defs (oe1def);
+ bsi = bsi_for_stmt (oe2def);
+ bsi_remove (&bsi, true);
+ release_defs (oe2def);
+
+ /* Finally recurse with the operands removed. */
+ VEC_unordered_remove (operand_entry_t, *ops, j);
+ VEC_unordered_remove (operand_entry_t, *ops, i);
+ add_to_ops_vec (ops, op);
+ sbitmap_free (candidates);
+ if (nr_candidates > 2)
+ undistribute_ops_list (opcode, ops, loop);
+ return;
+ }
+ }
+
+ sbitmap_free (candidates);
+ }
+
/* Perform various identities and other optimizations on the list of
operand entries, stored in OPS. The tree code for the binary
operation between all the operands is OPCODE. */
*************** should_break_up_subtract (tree stmt)
*** 1093,1099 ****
if (TREE_CODE (lhs) == SSA_NAME
&& (immusestmt = get_single_immediate_use (lhs))
! && TREE_CODE (GIMPLE_STMT_OPERAND (immusestmt, 1)) == PLUS_EXPR)
return true;
return false;
--- 1267,1274 ----
if (TREE_CODE (lhs) == SSA_NAME
&& (immusestmt = get_single_immediate_use (lhs))
! && (TREE_CODE (GIMPLE_STMT_OPERAND (immusestmt, 1)) == PLUS_EXPR
! || TREE_CODE (GIMPLE_STMT_OPERAND (immusestmt, 1)) == MULT_EXPR))
return true;
return false;
*************** reassociate_bb (basic_block bb)
*** 1355,1366 ****
--- 1530,1545 ----
TREE_VISITED (stmt) = 1;
linearize_expr_tree (&ops, stmt);
+
qsort (VEC_address (operand_entry_t, ops),
VEC_length (operand_entry_t, ops),
sizeof (operand_entry_t),
sort_by_operand_rank);
optimize_ops_list (TREE_CODE (rhs), &ops);
+ undistribute_ops_list (TREE_CODE (rhs), &ops,
+ loop_containing_stmt (stmt));
+
if (VEC_length (operand_entry_t, ops) == 1)
{
if (dump_file && (dump_flags & TDF_DETAILS))
Index: gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c 2008-05-15 17:14:18.000000000 +0200
***************
*** 0 ****
--- 1,23 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-reassoc1" } */
+
+ int test1 (int x, int y, int z, int weight)
+ {
+ int tmp1 = x * weight;
+ int tmp2 = y * weight;
+ int tmp3 = (x - y) * weight;
+ return tmp1 + (tmp2 + tmp3);
+ }
+
+ int test2 (int x, int y, int z, int weight)
+ {
+ int tmp1 = x * weight;
+ int tmp2 = y * weight * weight;
+ int tmp3 = z * weight * weight * weight;
+ return tmp1 + tmp2 + tmp3;
+ }
+
+ /* There should be one multiplication left in test1 and three in test2. */
+
+ /* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" } } */
+ /* { dg-final { cleanup-tree-dump "reassoc1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c 2008-05-15 17:14:18.000000000 +0200
***************
*** 0 ****
--- 1,19 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-reassoc1" } */
+
+ int test3 (int x, int y, int z, int weight, int w1, int w2, int w3)
+ {
+ int wtmp1 = w1 * weight;
+ int wtmp2 = w2 * weight;
+ int wtmp3 = w3 * weight;
+ int tmp1 = x * wtmp1;
+ int tmp2 = y * wtmp2;
+ int tmp3 = z * wtmp3;
+ return tmp1 + tmp2 + tmp3;
+ }
+
+ /* The multiplication with weight should be un-distributed.
+ ??? This pattern is not recognized currently. */
+
+ /* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" { xfail *-*-* } } } */
+ /* { dg-final { cleanup-tree-dump "reassoc1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c 2008-05-15 17:14:18.000000000 +0200
***************
*** 0 ****
--- 1,16 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
+
+ double test1 (double x, double y, double z, double weight)
+ {
+ double tmp1 = x / weight;
+ double tmp2 = y / weight;
+ double tmp3 = -x / weight;
+ return tmp1 + tmp2 + tmp3;
+ }
+
+ /* The division should be un-distributed and all references to x should
+ be gone. */
+
+ /* { dg-final { scan-tree-dump-times "/" 1 "reassoc1" } } */
+ /* { dg-final { cleanup-tree-dump "reassoc1" } } */
Index: gcc/testsuite/gcc.dg/torture/reassoc-1.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/torture/reassoc-1.c 2008-05-15 17:14:18.000000000 +0200
***************
*** 0 ****
--- 1,24 ----
+ /* { dg-do run } */
+
+ int x;
+
+ int __attribute__((noinline))
+ foo(int a, int b, int w)
+ {
+ int tmp1 = a * w;
+ int tmp2 = b * w;
+ x = tmp1;
+ return tmp1 + tmp2;
+ }
+
+ extern void abort (void);
+
+ int main()
+ {
+ if (foo(1, 2, 3) != 9)
+ abort ();
+ if (x != 3)
+ abort ();
+ return 0;
+ }
+
Index: gcc/testsuite/gcc.dg/tree-ssa/recip-2.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/recip-2.c.orig 2008-05-14 17:27:08.000000000 +0200
--- gcc/testsuite/gcc.dg/tree-ssa/recip-2.c 2008-05-15 17:14:18.000000000 +0200
***************
*** 1,7 ****
/* { dg-do compile } */
/* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */
! float e(float a, float b, float c, float d, float e, float f)
{
if (a < b)
{
--- 1,9 ----
/* { dg-do compile } */
/* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */
! float u, v, w, x, y, z;
!
! void e(float a, float b, float c, float d, float e, float f)
{
if (a < b)
{
*************** float e(float a, float b, float c, float
*** 20,26 ****
/* This should not be left as a multiplication. */
c = 1 / c;
! return a + b + c + d + e + f;
}
/* { dg-final { scan-tree-dump-times " / " 2 "recip" } } */
--- 22,33 ----
/* This should not be left as a multiplication. */
c = 1 / c;
! u = a;
! v = b;
! w = c;
! x = d;
! y = e;
! z = f;
}
/* { dg-final { scan-tree-dump-times " / " 2 "recip" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/recip-6.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/recip-6.c.orig 2008-05-14 17:27:08.000000000 +0200
--- gcc/testsuite/gcc.dg/tree-ssa/recip-6.c 2008-05-15 17:14:18.000000000 +0200
***************
*** 5,11 ****
extern int f2();
! double f1(double y, double z, double w)
{
double b, c, d, e, f;
--- 5,13 ----
extern int f2();
! double m, n, o;
!
! void f1(double y, double z, double w)
{
double b, c, d, e, f;
*************** double f1(double y, double z, double w)
*** 18,24 ****
e = c / y;
f = 1 / y;
! return d + e + f;
}
/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
--- 20,28 ----
e = c / y;
f = 1 / y;
! m = d;
! n = e;
! o = f;
}
/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/recip-7.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/recip-7.c.orig 2008-05-14 17:27:08.000000000 +0200
--- gcc/testsuite/gcc.dg/tree-ssa/recip-7.c 2008-05-15 17:14:18.000000000 +0200
***************
*** 5,11 ****
extern double h();
! double f(int x, double z, double w)
{
double b, c, d, e, f;
double y = h ();
--- 5,13 ----
extern double h();
! double m, n, o;
!
! void f(int x, double z, double w)
{
double b, c, d, e, f;
double y = h ();
*************** double f(int x, double z, double w)
*** 19,25 ****
e = c / y;
f = 1 / y;
! return d + e + f;
}
/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
--- 21,29 ----
e = c / y;
f = 1 / y;
! m = d;
! n = e;
! o = f;
}
/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */