[PATCH] Implement un-distribution of multiplication and division in the re-association pass
Richard Guenther
rguenther@suse.de
Tue Apr 29 16:38:00 GMT 2008
Currently only fold can optimize a * b + c * b to (a + c) * b saving one
multiplication. The tree re-association pass can be extended to do this
"un-distribution" thereby integrating re-association and un-distribution
tightly enough to also optimize cases like
int test1 (int x, int y, int z, int weight)
{
int tmp1 = x * weight;
int tmp2 = y * weight;
int tmp3 = (x - y) * weight;
return tmp1 + (tmp2 + tmp3);
}
I realize this patch makes tree-ssa-reassoc.c more like a weak
tree-combiner, but un-distribution is closely enough related to
re-association that I didn't care. The integration of this (and further
similar) optimization might also benefit from an onverhaul of the
passes internal data structures, but it fits the current implementation
of the existing optimizations well enough.
Bootstrapped and tested on x86_64-unknown-linux-gnu, ok for mainline?
Thanks,
Richard.
2008-04-29 Richard Guenther <rguenther@suse.de>
PR tree-optimization/15255
* tree-ssa-reassoc.c (undistribute_ops_list): Perform
un-distribution of multiplication and division on the
chain of summands.
(should_break_up_subtract): Also break up subtracts for
factors.
(reassociate_bb): Call undistribute_ops_list.
* gcc.dg/tree-ssa/reassoc-14.c: New testcase.
* gcc.dg/tree-ssa/reassoc-15.c: Likewise.
* gcc.dg/tree-ssa/reassoc-16.c: Likewise.
Index: gcc/tree-ssa-reassoc.c
===================================================================
*** gcc/tree-ssa-reassoc.c.orig 2008-04-28 17:23:24.000000000 +0200
--- gcc/tree-ssa-reassoc.c 2008-04-29 11:32:03.000000000 +0200
*************** eliminate_using_constants (enum tree_cod
*** 722,727 ****
--- 722,848 ----
}
}
+ /* Perform un-distribution of operations. A * X + B * X -> (A + B) * X. */
+
+ static void
+ undistribute_ops_list (enum tree_code opcode,
+ VEC (operand_entry_t, heap) **ops)
+ {
+ unsigned int length = VEC_length (operand_entry_t, *ops);
+ operand_entry_t oe1, oe2;
+ unsigned i, j;
+
+ if (length <= 1
+ || opcode != PLUS_EXPR)
+ return;
+
+ for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe1); ++i)
+ {
+ enum tree_code dcode;
+ tree oe1def;
+
+ if (TREE_CODE (oe1->op) != SSA_NAME)
+ continue;
+ oe1def = SSA_NAME_DEF_STMT (oe1->op);
+ if (TREE_CODE (oe1def) != GIMPLE_MODIFY_STMT)
+ continue;
+ dcode = TREE_CODE (GIMPLE_STMT_OPERAND (oe1def, 1));
+ if (dcode != MULT_EXPR
+ && dcode != RDIV_EXPR)
+ continue;
+
+ for (j = i + 1; VEC_iterate (operand_entry_t, *ops, j, oe2); ++j)
+ {
+ tree oe2def;
+ tree op10, op11, op20, op21, tmp, tmpvar, op;
+ block_stmt_iterator bsi;
+
+ if (TREE_CODE (oe2->op) != SSA_NAME)
+ continue;
+ oe2def = SSA_NAME_DEF_STMT (oe2->op);
+ if (TREE_CODE (oe2def) != GIMPLE_MODIFY_STMT)
+ continue;
+ if (TREE_CODE (GIMPLE_STMT_OPERAND (oe2def, 1)) != dcode)
+ continue;
+
+ op10 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe1def, 1), 0);
+ op11 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe1def, 1), 1);
+ op20 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe2def, 1), 0);
+ op21 = TREE_OPERAND (GIMPLE_STMT_OPERAND (oe2def, 1), 1);
+ if (op10 == op20
+ && dcode != RDIV_EXPR)
+ {
+ tmp = op10; op10 = op11; op11 = tmp;
+ tmp = op20; op20 = op21; op21 = tmp;
+ }
+ else if (op10 == op21
+ && dcode != RDIV_EXPR)
+ {
+ tmp = op10; op10 = op11; op11 = tmp;
+ }
+ else if (op11 == op20
+ && dcode != RDIV_EXPR)
+ {
+ tmp = op20; op20 = op21; op21 = tmp;
+ }
+ else if (op11 == op21)
+ ;
+ else
+ continue;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "applying reverse distributive law to ");
+ print_generic_expr (dump_file, oe1def, 0);
+ fprintf (dump_file, " and ");
+ print_generic_expr (dump_file, oe2def, 0);
+ fprintf (dump_file, "\n");
+ }
+
+ /* Remove the immediate uses of the old stmts. */
+ GIMPLE_STMT_OPERAND (oe1def, 1)
+ = fold_convert (TREE_TYPE (op10), integer_zero_node);
+ update_stmt (oe1def);
+ GIMPLE_STMT_OPERAND (oe2def, 1)
+ = fold_convert (TREE_TYPE (op20), integer_zero_node);
+ update_stmt (oe2def);
+
+ /* Build two new stmts to avoid assigning different values to
+ names than in the original program (for debugging) and to
+ make sure not to break dominator relationships. */
+ tmpvar = create_tmp_var (TREE_TYPE (op10), NULL);
+ add_referenced_var (tmpvar);
+ if (stmt_dominates_stmt_p (oe1def, oe2def))
+ bsi = bsi_for_stmt (oe2def);
+ else
+ bsi = bsi_for_stmt (oe1def);
+
+ tmp = build_gimple_modify_stmt (NULL_TREE,
+ fold_build2 (opcode, TREE_TYPE (op10),
+ op10, op20));
+ op = make_ssa_name (tmpvar, tmp);
+ GIMPLE_STMT_OPERAND (tmp, 0) = op;
+ bsi_insert_before (&bsi, tmp, BSI_SAME_STMT);
+ update_stmt (tmp);
+
+ tmp = build_gimple_modify_stmt (NULL_TREE,
+ fold_build2 (dcode, TREE_TYPE (op10),
+ op, op11));
+ op = make_ssa_name (tmpvar, tmp);
+ GIMPLE_STMT_OPERAND (tmp, 0) = op;
+ bsi_insert_before (&bsi, tmp, BSI_SAME_STMT);
+ update_stmt (tmp);
+
+ /* Finally recurse with the operands removed. */
+ VEC_unordered_remove (operand_entry_t, *ops, j);
+ VEC_unordered_remove (operand_entry_t, *ops, i);
+ add_to_ops_vec (ops, op);
+ undistribute_ops_list (opcode, ops);
+ return;
+ }
+ }
+ }
+
/* Perform various identities and other optimizations on the list of
operand entries, stored in OPS. The tree code for the binary
operation between all the operands is OPCODE. */
*************** should_break_up_subtract (tree stmt)
*** 1093,1099 ****
if (TREE_CODE (lhs) == SSA_NAME
&& (immusestmt = get_single_immediate_use (lhs))
! && TREE_CODE (GIMPLE_STMT_OPERAND (immusestmt, 1)) == PLUS_EXPR)
return true;
return false;
--- 1214,1221 ----
if (TREE_CODE (lhs) == SSA_NAME
&& (immusestmt = get_single_immediate_use (lhs))
! && (TREE_CODE (GIMPLE_STMT_OPERAND (immusestmt, 1)) == PLUS_EXPR
! || TREE_CODE (GIMPLE_STMT_OPERAND (immusestmt, 1)) == MULT_EXPR))
return true;
return false;
*************** reassociate_bb (basic_block bb)
*** 1355,1366 ****
--- 1477,1491 ----
TREE_VISITED (stmt) = 1;
linearize_expr_tree (&ops, stmt);
+
qsort (VEC_address (operand_entry_t, ops),
VEC_length (operand_entry_t, ops),
sizeof (operand_entry_t),
sort_by_operand_rank);
optimize_ops_list (TREE_CODE (rhs), &ops);
+ undistribute_ops_list (TREE_CODE (rhs), &ops);
+
if (VEC_length (operand_entry_t, ops) == 1)
{
if (dump_file && (dump_flags & TDF_DETAILS))
Index: gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c 2008-04-28 17:31:41.000000000 +0200
***************
*** 0 ****
--- 1,23 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-reassoc1" } */
+
+ int test1 (int x, int y, int z, int weight)
+ {
+ int tmp1 = x * weight;
+ int tmp2 = y * weight;
+ int tmp3 = (x - y) * weight;
+ return tmp1 + (tmp2 + tmp3);
+ }
+
+ int test2 (int x, int y, int z, int weight)
+ {
+ int tmp1 = x * weight;
+ int tmp2 = y * weight * weight;
+ int tmp3 = z * weight * weight * weight;
+ return tmp1 + tmp2 + tmp3;
+ }
+
+ /* There should be one multiplication left in test1 and three in test2. */
+
+ /* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" } } */
+ /* { dg-final { cleanup-tree-dump "reassoc1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c 2008-04-29 11:28:38.000000000 +0200
***************
*** 0 ****
--- 1,19 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -fdump-tree-reassoc1" } */
+
+ int test3 (int x, int y, int z, int weight, int w1, int w2, int w3)
+ {
+ int wtmp1 = w1 * weight;
+ int wtmp2 = w2 * weight;
+ int wtmp3 = w3 * weight;
+ int tmp1 = x * wtmp1;
+ int tmp2 = y * wtmp2;
+ int tmp3 = z * wtmp3;
+ return tmp1 + tmp2 + tmp3;
+ }
+
+ /* The multiplication with weight should be un-distributed.
+ ??? This pattern is not recognized currently. */
+
+ /* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" { xfail *-*-* } } } */
+ /* { dg-final { cleanup-tree-dump "reassoc1" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c 2008-04-29 11:37:09.000000000 +0200
***************
*** 0 ****
--- 1,16 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
+
+ double test1 (double x, double y, double z, double weight)
+ {
+ double tmp1 = x / weight;
+ double tmp2 = y / weight;
+ double tmp3 = -x / weight;
+ return tmp1 + tmp2 + tmp3;
+ }
+
+ /* The division should be un-distributed and all references to x should
+ be gone. */
+
+ /* { dg-final { scan-tree-dump-times "/" 1 "reassoc1" } } */
+ /* { dg-final { cleanup-tree-dump "reassoc1" } } */
More information about the Gcc-patches
mailing list