This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] do not lower a/b to a*(1/b)
Dan, could PRE strength reduction catch this "optimization"
that we'd miss with Paolo's patch?
No, I already did it in a separate pass. It is run only with
flag_unsafe_math_optimizations, so the cost is tolerable and it's an
occasion for me to learn about immediate uses. I'm waiting for mainline
to be bootstrappable again, but in the meanwhile here is a prototype
patch, tested by building cc1 on top of yesterday's snapshot and testing
the two attached testcases.
I took the occasion to implement some foldings of / in fold-const.c.
Ok if it passes bootstrap and regtesting for all languages?
Paolo
2005-05-13 Paolo Bonzini <bonzini@gnu.org>
* tree-ssa.c (gate_cse_reciprocals, execute_cse_reciprocals,
pass_cse_reciprocals): New.
* fold-const.c (distribute_real_division): New.
(fold_binary): Use it.
* tree-pass.h (pass_cse_reciprocals): New.
* tree-optimize.c (init_tree_optimization_passes): Run it.
* Makefile.in: Adjust dependencies.
2005-05-13 Paolo Bonzini <bonzini@gnu.org>
* testsuite/gcc.dg/fold-div-1.c, testsuite/gcc.dg/recip-1.c: New.
Index: tree-ssa.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-ssa.c,v
retrieving revision 2.94
diff -p -u -u -r2.94 tree-ssa.c
--- tree-ssa.c 8 May 2005 15:07:22 -0000 2.94
+++ tree-ssa.c 13 May 2005 16:13:16 -0000
@@ -28,6 +28,7 @@ Boston, MA 02111-1307, USA. */
#include "tm_p.h"
#include "ggc.h"
#include "langhooks.h"
+#include "real.h"
#include "hard-reg-set.h"
#include "basic-block.h"
#include "output.h"
@@ -1222,4 +1223,114 @@ struct tree_opt_pass pass_late_warn_unin
0, /* todo_flags_finish */
0 /* letter */
};
-
+
+
+/* A mini-pass that tries to CSE reciprocal operations. These are
+ common in sequences such as this one:
+
+ modulus = sqrt(x*x + y*y + z*z);
+ x = x / modulus;
+ y = y / modulus;
+ z = z / modulus;
+
+ that can be optimized to
+
+ modulus = sqrt(x*x + y*y + z*z);
+ rmodulus = 1.0 / modulus;
+ x = x * rmodulus;
+ y = y * rmodulus;
+ z = z * rmodulus;
+
+ We do this for loop invariant divisors, and within a single basic
+ block in this function. */
+static bool
+gate_cse_reciprocals (void)
+{
+ return optimize && !optimize_size && flag_unsafe_math_optimizations;
+}
+
+static void
+execute_cse_reciprocals (void)
+{
+ block_stmt_iterator bsi;
+ basic_block bb;
+ def_operand_p def_p;
+ use_operand_p use_p;
+ ssa_op_iter def_iter;
+ imm_use_iterator use_iter;
+
+ FOR_EACH_BB (bb)
+ {
+ for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+ {
+ tree stmt = bsi_stmt (bsi), type;
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ continue;
+ type = TREE_TYPE (TREE_OPERAND (stmt, 0));
+ if (!FLOAT_TYPE_P (type))
+ continue;
+
+ FOR_EACH_PHI_OR_STMT_DEF(def_p, stmt, def_iter, SSA_OP_DEF)
+ {
+ tree t, new_stmt, def = DEF_FROM_PTR (def_p);
+ int count = 0;
+ /* Find uses. */
+ FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
+ {
+ tree use_stmt = USE_STMT (use_p);
+ if (TREE_CODE (use_stmt) == MODIFY_EXPR
+ && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
+ && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def)
+ count++;
+ if (count == 2)
+ break;
+ }
+
+ if (count < 2)
+ continue;
+
+ /* Make a variable with the replacement and substitute it. */
+ t = make_rename_temp (type, "reciptmp");
+ new_stmt = build2 (MODIFY_EXPR, void_type_node, t,
+ fold_build2 (RDIV_EXPR, type,
+ build_real (type, dconst1),
+ TREE_OPERAND (stmt, 0)));
+
+ SET_EXPR_LOCUS (new_stmt, EXPR_LOCUS (stmt));
+ TREE_BLOCK (new_stmt) = TREE_BLOCK (stmt);
+ bsi_insert_after (&bsi, new_stmt, BSI_NEW_STMT);
+
+ FOR_EACH_IMM_USE_SAFE (use_p, use_iter, def)
+ {
+ tree use_stmt = USE_STMT (use_p);
+ if (use_stmt != new_stmt
+ && TREE_CODE (use_stmt) == MODIFY_EXPR
+ && TREE_CODE (TREE_OPERAND (use_stmt, 1)) == RDIV_EXPR
+ && TREE_OPERAND (TREE_OPERAND (use_stmt, 1), 1) == def)
+ {
+ TREE_CODE (TREE_OPERAND (use_stmt, 1)) = MULT_EXPR;
+ SET_USE (use_p, t);
+ }
+ }
+ }
+ }
+ }
+}
+
+struct tree_opt_pass pass_cse_reciprocals =
+{
+ "recip", /* name */
+ gate_cse_reciprocals, /* gate */
+ execute_cse_reciprocals, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ 0, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
+ | TODO_verify_stmts, /* todo_flags_finish */
+ 0 /* letter */
+};
Index: fold-const.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/fold-const.c,v
retrieving revision 1.578
diff -p -u -u -r1.578 fold-const.c
--- fold-const.c 11 May 2005 15:21:23 -0000 1.578
+++ fold-const.c 13 May 2005 16:13:17 -0000
@@ -3076,6 +3076,46 @@ distribute_bit_expr (enum tree_code code
return fold_build2 (TREE_CODE (arg0), type, common,
fold_build2 (code, type, left, right));
}
+
+/* Knowing that ARG0 and ARG1 are both RDIV_EXPRs, simplify a binary operation
+ with code CODE. This optimization is unsafe. */
+static tree
+distribute_real_division (enum tree_code code, tree type, tree arg0, tree arg1)
+{
+ bool mul0 = TREE_CODE (arg0) == MULT_EXPR;
+ bool mul1 = TREE_CODE (arg1) == MULT_EXPR;
+
+ /* (A / C) +- (B / C) -> (A +- B) / C. */
+ if (mul0 == mul1
+ && operand_equal_p (TREE_OPERAND (arg0, 1),
+ TREE_OPERAND (arg1, 1), 0))
+ return fold_build2 (mul0 ? MULT_EXPR : RDIV_EXPR, type,
+ fold_build2 (code, type,
+ TREE_OPERAND (arg0, 0),
+ TREE_OPERAND (arg1, 0)),
+ TREE_OPERAND (arg0, 1));
+
+ /* (A / C1) +- (A / C2) -> A * (1 / C1 +- 1 / C2). */
+ if (operand_equal_p (TREE_OPERAND (arg0, 0),
+ TREE_OPERAND (arg1, 0), 0)
+ && TREE_CODE (TREE_OPERAND (arg0, 1)) == REAL_CST
+ && TREE_CODE (TREE_OPERAND (arg1, 1)) == REAL_CST)
+ {
+ REAL_VALUE_TYPE r0, r1;
+ r0 = TREE_REAL_CST (TREE_OPERAND (arg0, 1));
+ r1 = TREE_REAL_CST (TREE_OPERAND (arg1, 1));
+ if (!mul0)
+ real_arithmetic (&r0, RDIV_EXPR, &dconst1, &r0);
+ if (!mul1)
+ real_arithmetic (&r1, RDIV_EXPR, &dconst1, &r1);
+ real_arithmetic (&r0, code, &r0, &r1);
+ return fold_build2 (MULT_EXPR, type,
+ TREE_OPERAND (arg0, 0),
+ build_real (type, r0));
+ }
+
+ return NULL_TREE;
+}
/* Return a BIT_FIELD_REF of type TYPE to refer to BITSIZE bits of INNER
starting at BITPOS. The field is unsigned if UNSIGNEDP is nonzero. */
@@ -7502,6 +7544,12 @@ fold_binary (enum tree_code code, tree t
fold_convert (type, tem));
}
+ if (flag_unsafe_math_optimizations
+ && (TREE_CODE (arg0) == RDIV_EXPR || TREE_CODE (arg0) == MULT_EXPR)
+ && (TREE_CODE (arg1) == RDIV_EXPR || TREE_CODE (arg1) == MULT_EXPR)
+ && (tem = distribute_real_division (code, type, arg0, arg1)))
+ return tem;
+
/* Convert x+x into x*2.0. */
if (operand_equal_p (arg0, arg1, 0)
&& SCALAR_FLOAT_TYPE_P (type))
@@ -7899,6 +7947,12 @@ fold_binary (enum tree_code code, tree t
return fold_convert (type, fold (tem));
}
+ if (flag_unsafe_math_optimizations
+ && (TREE_CODE (arg0) == RDIV_EXPR || TREE_CODE (arg0) == MULT_EXPR)
+ && (TREE_CODE (arg1) == RDIV_EXPR || TREE_CODE (arg1) == MULT_EXPR)
+ && (tem = distribute_real_division (code, type, arg0, arg1)))
+ return tem;
+
if (TREE_CODE (arg0) == MULT_EXPR
&& TREE_CODE (arg1) == MULT_EXPR
&& (!FLOAT_TYPE_P (type) || flag_unsafe_math_optimizations))
Index: tree-optimize.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-optimize.c,v
retrieving revision 2.91
diff -p -u -u -r2.91 tree-optimize.c
--- tree-optimize.c 12 May 2005 19:29:21 -0000 2.91
+++ tree-optimize.c 13 May 2005 16:13:17 -0000
@@ -402,6 +402,7 @@ init_tree_optimization_passes (void)
we add may_alias right after fold builtins
which can create arbitrary GIMPLE. */
NEXT_PASS (pass_may_alias);
+ NEXT_PASS (pass_cse_reciprocals);
NEXT_PASS (pass_split_crit_edges);
NEXT_PASS (pass_pre);
NEXT_PASS (pass_sink_code);
Index: tree-pass.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-pass.h,v
retrieving revision 2.33
diff -p -u -u -r2.33 tree-pass.h
--- tree-pass.h 21 Apr 2005 13:18:23 -0000 2.33
+++ tree-pass.h 13 May 2005 16:13:17 -0000
@@ -196,6 +196,7 @@ extern struct tree_opt_pass pass_fold_bu
extern struct tree_opt_pass pass_stdarg;
extern struct tree_opt_pass pass_early_warn_uninitialized;
extern struct tree_opt_pass pass_late_warn_uninitialized;
+extern struct tree_opt_pass pass_cse_reciprocals;
extern struct tree_opt_pass pass_warn_function_return;
extern struct tree_opt_pass pass_phiopt;
extern struct tree_opt_pass pass_forwprop;
Index: expr.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/expr.c,v
retrieving revision 1.789
diff -p -u -u -r1.789 expr.c
--- expr.c 3 May 2005 22:21:48 -0000 1.789
+++ expr.c 13 May 2005 16:13:18 -0000
@@ -7806,18 +7806,6 @@ expand_expr_real_1 (tree exp, rtx target
return expand_divmod (0, code, mode, op0, op1, target, unsignedp);
case RDIV_EXPR:
- /* Emit a/b as a*(1/b). Later we may manage CSE the reciprocal saving
- expensive divide. If not, combine will rebuild the original
- computation. */
- if (flag_unsafe_math_optimizations && optimize && !optimize_size
- && TREE_CODE (type) == REAL_TYPE
- && !real_onep (TREE_OPERAND (exp, 0)))
- return expand_expr (build2 (MULT_EXPR, type, TREE_OPERAND (exp, 0),
- build2 (RDIV_EXPR, type,
- build_real (type, dconst1),
- TREE_OPERAND (exp, 1))),
- target, tmode, modifier);
-
goto binop;
case TRUNC_MOD_EXPR:
Index: doc/passes.texi
===================================================================
RCS file: /cvs/gcc/gcc/gcc/doc/passes.texi,v
retrieving revision 1.53
diff -p -u -r1.53 passes.texi
--- doc/passes.texi 4 May 2005 17:15:30 -0000 1.53
+++ doc/passes.texi 13 May 2005 16:18:39 -0000
@@ -354,7 +354,7 @@ This pass transforms tail recursion into
This pass sinks stores and assignments down the flowgraph closer to it's
use point. The pass is located in @file{tree-ssa-sink.c} and is
-described by @code{pass_sink_code}
+described by @code{pass_sink_code}.
@item Partial redundancy elimination
@@ -362,6 +362,11 @@ This pass eliminates partially redundant
performing load motion. The pass is located in @file{tree-ssa-pre.c}
and is described by @code{pass_pre}.
+Just before partial redundancy elimination, if
+@option{-funsafe-math-optimizations} is on, GCC tries to convert
+divisions to multiplications by the reciprocal. The pass is located
+in @file{tree-ssa.c} and is described by @code{pass_cse_reciprocal}.
+
@item Loop optimization
The main driver of the pass is placed in @file{tree-ssa-loop.c}
Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.1484
diff -p -u -r1.1484 Makefile.in
--- Makefile.in 11 May 2005 16:25:24 -0000 1.1484
+++ Makefile.in 13 May 2005 16:24:33 -0000
@@ -1646,7 +1646,7 @@ tree-ssa.o : tree-ssa.c $(TREE_FLOW_H) $
errors.h toplev.h function.h $(TIMEVAR_H) $(TM_H) coretypes.h \
$(TREE_DUMP_H) langhooks.h tree-pass.h $(BASIC_BLOCK_H) bitmap.h \
$(FLAGS_H) $(GGC_H) hard-reg-set.h $(HASHTAB_H) pointer-set.h \
- $(TREE_GIMPLE_H) tree-inline.h $(VARRAY_H)
+ $(TREE_GIMPLE_H) tree-inline.h $(VARRAY_H) real.h
tree-into-ssa.o : tree-into-ssa.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
$(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) output.h $(DIAGNOSTIC_H) \
errors.h function.h $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
Index: testsuite/gcc.dg/fold-div-1.c
===================================================================
RCS file: testsuite/gcc.dg/fold-div-1.c
diff -N testsuite/gcc.dg/fold-div-1.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ testsuite/gcc.dg/fold-div-1.c 13 May 2005 16:13:18 -0000
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-funsafe-math-optimizations -fdump-tree-generic" } */
+
+float f(float x)
+{
+ return x/2 + x/3;
+}
+
+float g(float x)
+{
+ return 2/x + 3/x;
+}
+
+float h(float x)
+{
+ return x/2 - x/3;
+}
+
+float i(float x)
+{
+ return 2/x - 3/x;
+}
+
+/* f and h should be turned into multiplications,
+ the divisions in g and i should be grouped together. */
+
+/* { dg-final { scan-tree-dump-times " \\* " 2 "generic" } } */
+/* { dg-final { scan-tree-dump-times " / " 2 "generic" } } */
+/* dg-final { cleanup-tree-dump "generic" } } */
+
Index: testsuite/gcc.dg/tree-ssa/recip-1.c
===================================================================
RCS file: testsuite/gcc.dg/tree-ssa/recip-1.c
diff -N testsuite/gcc.dg/tree-ssa/recip-1.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ testsuite/gcc.dg/tree-ssa/recip-1.c 13 May 2005 16:13:18 -0000
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */
+
+float e(float *x, float *y, float *z)
+{
+ float m = __builtin_sqrt (*x * *x + *y * *y + *z * *z);
+ *x /= m;
+ *y /= m;
+ *z /= m;
+}
+
+/* Look for only one division. */
+/* { dg-final { scan-tree-dump-times "= .* /" 1 "recip" } } */
+/* { dg-final { cleanup-tree-dump "recip" } } */