This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Optimize const1 * copysign (const2, y) in reassoc (PR tree-optimization/67815)
- From: Marek Polacek <polacek at redhat dot com>
- To: Richard Biener <rguenther at suse dot de>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>, Jakub Jelinek <jakub at redhat dot com>, Joseph Myers <joseph at codesourcery dot com>
- Date: Wed, 14 Oct 2015 14:20:20 +0200
- Subject: Re: [PATCH] Optimize const1 * copysign (const2, y) in reassoc (PR tree-optimization/67815)
- Authentication-results: sourceware.org; auth=none
- References: <20151013162849 dot GF13672 at redhat dot com> <alpine dot LSU dot 2 dot 11 dot 1510141105190 dot 29931 at zhemvz dot fhfr dot qr>
On Wed, Oct 14, 2015 at 11:10:38AM +0200, Richard Biener wrote:
> > + FOR_EACH_VEC_ELT (*ops, i, oe)
> > + {
> > + if (TREE_CODE (oe->op) == SSA_NAME)
>
> I think you need to check whether the SSA_NAME has a single use only
> as you are changing its value. Which also means you shouldn't be
> "reusing" it (because existing debug stmts will then be wrong).
> Thus you have to replace it.
Changed as per our discussion on IRC. I'm building a new call while
the old one is going to be cleaned up by subsequent DCE.
> > + ops->ordered_remove (i);
> > + add_to_ops_vec (ops, negrhs);
>
> Why use ordered remove and add_to_ops_vec here? Just replace the entry?
Fixed.
I also added a new test.
Bootstrapped/regtested on x86_64-linux, ok for trunk?
2015-10-14 Marek Polacek <polacek@redhat.com>
PR tree-optimization/67815
* tree-ssa-reassoc.c (attempt_builtin_copysign): New function.
(reassociate_bb): Call it.
* gcc.dg/tree-ssa/reassoc-39.c: New test.
* gcc.dg/tree-ssa/reassoc-40.c: New test.
* gcc.dg/tree-ssa/reassoc-41.c: New test.
diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
index e69de29..589d06b 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
@@ -0,0 +1,41 @@
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-reassoc1-details" } */
+
+float
+f0 (float x)
+{
+ return 7.5 * __builtin_copysignf (2.0, x);
+}
+
+float
+f1 (float x)
+{
+ return -7.5 * __builtin_copysignf (2.0, x);
+}
+
+double
+f2 (double x, double y)
+{
+ return x * ((1.0/12) * __builtin_copysign (1.0, y));
+}
+
+double
+f3 (double x, double y)
+{
+ return (x * (-1.0/12)) * __builtin_copysign (1.0, y);
+}
+
+double
+f4 (double x, double y, double z)
+{
+ return (x * z) * ((1.0/12) * __builtin_copysign (4.0, y));
+}
+
+double
+f5 (double x, double y, double z)
+{
+ return (x * (-1.0/12)) * z * __builtin_copysign (2.0, y);
+}
+
+/* { dg-final { scan-tree-dump-times "Optimizing copysign" 6 "reassoc1"} }*/
diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
index e69de29..d65bcc1b 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -frounding-math -fdump-tree-reassoc1-details" } */
+
+/* Test that the copysign reassoc optimization doesn't fire for
+ -frounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication
+ is inexact. */
+
+double
+f1 (double y)
+{
+ return (1.2 * __builtin_copysign (1.1, y));
+}
+
+double
+f2 (double y)
+{
+ return (-1.2 * __builtin_copysign (1.1, y));
+}
+
+/* { dg-final { scan-tree-dump-not "Optimizing copysign" "reassoc1" } } */
diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-41.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-41.c
index e69de29..8a18b88 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-41.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-41.c
@@ -0,0 +1,21 @@
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fno-rounding-math -fdump-tree-reassoc1-details" } */
+
+/* Test that the copysign reassoc optimization does fire for
+ -fno-rounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication
+ is inexact. */
+
+double
+f1 (double y)
+{
+ return (1.2 * __builtin_copysign (1.1, y));
+}
+
+double
+f2 (double y)
+{
+ return (-1.2 * __builtin_copysign (1.1, y));
+}
+
+/* { dg-final { scan-tree-dump-times "Optimizing copysign" 2 "reassoc1"} }*/
diff --git gcc/tree-ssa-reassoc.c gcc/tree-ssa-reassoc.c
index 879722e..62438dd 100644
--- gcc/tree-ssa-reassoc.c
+++ gcc/tree-ssa-reassoc.c
@@ -4622,6 +4622,102 @@ attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
return result;
}
+/* Attempt to optimize
+ CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
+ CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
+
+static void
+attempt_builtin_copysign (vec<operand_entry *> *ops)
+{
+ operand_entry *oe;
+ unsigned int i;
+ unsigned int length = ops->length ();
+ tree cst = ops->last ()->op;
+
+ if (length == 1 || TREE_CODE (cst) != REAL_CST)
+ return;
+
+ FOR_EACH_VEC_ELT (*ops, i, oe)
+ {
+ if (TREE_CODE (oe->op) == SSA_NAME
+ && has_single_use (oe->op))
+ {
+ gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
+ if (is_gimple_call (def_stmt))
+ {
+ tree fndecl = gimple_call_fndecl (def_stmt);
+ tree arg0, arg1;
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ CASE_FLT_FN (BUILT_IN_COPYSIGN):
+ arg0 = gimple_call_arg (def_stmt, 0);
+ arg1 = gimple_call_arg (def_stmt, 1);
+ /* The first argument of copysign must be a constant,
+ otherwise there's nothing to do. */
+ if (TREE_CODE (arg0) == REAL_CST)
+ {
+ tree mul = const_binop (MULT_EXPR, TREE_TYPE (cst),
+ cst, arg0);
+ /* If we couldn't fold to a single constant, skip it.
+ That happens e.g. for inexact multiplication when
+ -frounding-math. */
+ if (mul == NULL_TREE)
+ break;
+ /* Instead of adjusting the old DEF_STMT, let's build
+ a new call to not leak the LHS and prevent keeping
+ bogus debug statements. DCE will clean up the old
+ call. */
+ gcall *call = gimple_build_call (fndecl, 2, mul, arg1);
+ tree lhs = make_ssa_name (TREE_TYPE (arg0));
+ gimple_call_set_lhs (call, lhs);
+ gimple_set_location (call, gimple_location (def_stmt));
+ insert_stmt_after (call, def_stmt);
+ /* We've used the constant, get rid of it. */
+ ops->pop ();
+ bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
+ /* Handle the CST1 < 0 case by negating the result. */
+ if (cst1_neg)
+ {
+ tree negrhs = make_ssa_name (TREE_TYPE (lhs));
+ gimple *negate_stmt
+ = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
+ insert_stmt_after (negate_stmt, call);
+ oe->op = negrhs;
+ }
+ else
+ oe->op = lhs;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Optimizing copysign: ");
+ print_generic_expr (dump_file, cst, 0);
+ fprintf (dump_file, " * ");
+ print_generic_expr (dump_file,
+ gimple_call_fn (def_stmt), 0);
+ fprintf (dump_file, " (");
+ print_generic_expr (dump_file, arg0, 0);
+ fprintf (dump_file, ", ");
+ print_generic_expr (dump_file, arg1, 0);
+ fprintf (dump_file, ") into %s",
+ cst1_neg ? "-" : "");
+ print_generic_expr (dump_file,
+ gimple_call_fn (def_stmt), 0);
+ fprintf (dump_file, " (");
+ print_generic_expr (dump_file, mul, 0);
+ fprintf (dump_file, ", ");
+ print_generic_expr (dump_file, arg1, 0);
+ fprintf (dump_file, "\n");
+ }
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+}
+
/* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
static void
@@ -4764,6 +4860,9 @@ reassociate_bb (basic_block bb)
if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
optimize_range_tests (rhs_code, &ops);
+ if (rhs_code == MULT_EXPR)
+ attempt_builtin_copysign (&ops);
+
if (first_pass_instance
&& rhs_code == MULT_EXPR
&& flag_unsafe_math_optimizations)
Marek