[PATCH] Optimize const1 * copysign (const2, y) in reassoc (PR tree-optimization/67815)

Richard Biener rguenther@suse.de
Wed Oct 14 09:10:00 GMT 2015


On Tue, 13 Oct 2015, Marek Polacek wrote:

> This patch implements the copysign optimization for reassoc I promised
> I'd look into.  I.e.,
> 
> CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0
> CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0
> 
> After getting familiar with reassoc a bit this wasn't that hard.  But
> I'm hopeless when it comes to floating-point stuff, so I'd appreciate
> if you could glance over the tests.  The reassoc-40.c should address
> Joseph's comment in the audit trail (with -fno-rounding-math the
> optimization would take place).
> 
> For 0.0 * copysign (cst, x), the result is folded into 0.0 way before
> reassoc, so we probably don't have to pay attention to this case.
> 
> Bootstrapped/regtested on x86_64-linux, ok for trunk?
> 
> 2015-10-13  Marek Polacek  <polacek@redhat.com>
> 
> 	PR tree-optimization/67815
> 	* tree-ssa-reassoc.c (attempt_builtin_copysign): New function.
> 	(reassociate_bb): Call it.
> 
> 	* gcc.dg/tree-ssa/reassoc-39.c: New test.
> 	* gcc.dg/tree-ssa/reassoc-40.c: New test.
> 
> diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
> index e69de29..589d06b 100644
> --- gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
> +++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
> @@ -0,0 +1,41 @@
> +/* PR tree-optimization/67815 */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-reassoc1-details" } */
> +
> +float
> +f0 (float x)
> +{
> +  return 7.5 * __builtin_copysignf (2.0, x);
> +}
> +
> +float
> +f1 (float x)
> +{
> +  return -7.5 * __builtin_copysignf (2.0, x);
> +}
> +
> +double
> +f2 (double x, double y)
> +{
> +  return x * ((1.0/12) * __builtin_copysign (1.0, y));
> +}
> +
> +double
> +f3 (double x, double y)
> +{
> +  return (x * (-1.0/12)) * __builtin_copysign (1.0, y);
> +}
> +
> +double
> +f4 (double x, double y, double z)
> +{
> +  return (x * z) * ((1.0/12) * __builtin_copysign (4.0, y));
> +}
> +
> +double
> +f5 (double x, double y, double z)
> +{
> +  return (x * (-1.0/12)) * z * __builtin_copysign (2.0, y);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Optimizing copysign" 6 "reassoc1"} }*/
> diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
> index e69de29..d65bcc1b 100644
> --- gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
> +++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
> @@ -0,0 +1,21 @@
> +/* PR tree-optimization/67815 */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -frounding-math -fdump-tree-reassoc1-details" } */
> +
> +/* Test that the copysign reassoc optimization doesn't fire for
> +   -frounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication
> +   is inexact.  */
> +
> +double
> +f1 (double y)
> +{
> +  return (1.2 * __builtin_copysign (1.1, y));
> +}
> +
> +double
> +f2 (double y)
> +{
> +  return (-1.2 * __builtin_copysign (1.1, y));
> +}
> +
> +/* { dg-final { scan-tree-dump-not "Optimizing copysign" "reassoc1" } } */
> diff --git gcc/tree-ssa-reassoc.c gcc/tree-ssa-reassoc.c
> index 879722e..b8897b7 100644
> --- gcc/tree-ssa-reassoc.c
> +++ gcc/tree-ssa-reassoc.c
> @@ -4622,6 +4622,95 @@ attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
>    return result;
>  }
>  
> +/* Attempt to optimize
> +   CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
> +   CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
> +
> +static void
> +attempt_builtin_copysign (vec<operand_entry *> *ops)
> +{
> +  operand_entry *oe;
> +  unsigned int i;
> +  unsigned int length = ops->length ();
> +  tree cst1 = ops->last ()->op;
> +
> +  if (length == 1 || TREE_CODE (cst1) != REAL_CST)
> +    return;
> +
> +  FOR_EACH_VEC_ELT (*ops, i, oe)
> +    {
> +      if (TREE_CODE (oe->op) == SSA_NAME)

I think you need to check whether the SSA_NAME has a single use only
as you are changing its value.  Which also means you shouldn't be
"reusing" it (because existing debug stmts will then be wrong).
Thus you have to replace it.

> +	{
> +	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
> +	  if (is_gimple_call (def_stmt))
> +	    {
> +	      tree fndecl = gimple_call_fndecl (def_stmt);
> +	      tree cst2;
> +	      switch (DECL_FUNCTION_CODE (fndecl))
> +		{
> +		CASE_FLT_FN (BUILT_IN_COPYSIGN):
> +		  cst2 = gimple_call_arg (def_stmt, 0);
> +		  /* The first argument of copysign must be a constant,
> +		     otherwise there's nothing to do.  */
> +		  if (TREE_CODE (cst2) == REAL_CST)
> +		    {
> +		      tree mul = const_binop (MULT_EXPR, TREE_TYPE (cst1),
> +					      cst1, cst2);
> +		      /* If we couldn't fold to a single constant, skip it.  */
> +		      if (mul == NULL_TREE)
> +			break;
> +		      /* We're going to replace the copysign argument with
> +			 the multiplication product.  Remove the constant.  */
> +		      ops->pop ();
> +		      gimple_call_set_arg (def_stmt, 0, mul);
> +		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst1));
> +		      /* Handle the CST1 < 0 case -- negate the result.  */
> +		      if (cst1_neg)
> +			{
> +			  tree lhs = gimple_call_lhs (def_stmt);
> +			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
> +			  gimple *negate_stmt
> +			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
> +			  insert_stmt_after (negate_stmt, def_stmt);
> +			  ops->ordered_remove (i);
> +			  add_to_ops_vec (ops, negrhs);

Why use ordered remove and add_to_ops_vec here?  Just replace the entry?

Thanks,
Richard.

> +			}
> +		      if (dump_file && (dump_flags & TDF_DETAILS))
> +			{
> +			  fprintf (dump_file, "Optimizing copysign: ");
> +			  print_generic_expr (dump_file, cst1, 0);
> +			  fprintf (dump_file, " * ");
> +			  print_generic_expr (dump_file,
> +					      gimple_call_fn (def_stmt), 0);
> +			  fprintf (dump_file, " (");
> +			  print_generic_expr (dump_file, cst2, 0);
> +			  fprintf (dump_file, ", ");
> +			  print_generic_expr (dump_file,
> +					      gimple_call_arg (def_stmt, 1),
> +					      0);
> +			  fprintf (dump_file, ") into %s",
> +				   cst1_neg ? "-" : "");
> +			  print_generic_expr (dump_file,
> +					      gimple_call_fn (def_stmt), 0);
> +			  fprintf (dump_file, " (");
> +			  print_generic_expr (dump_file, mul, 0);
> +			  fprintf (dump_file, ", ");
> +			  print_generic_expr (dump_file,
> +					      gimple_call_arg (def_stmt, 1),
> +					      0);
> +			  fprintf (dump_file, "\n");
> +			}
> +		      return;
> +		    }
> +		  break;
> +		default:
> +		  break;
> +		}
> +	    }
> +	}
> +    }
> +}
> +
>  /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
>  
>  static void
> @@ -4764,6 +4853,9 @@ reassociate_bb (basic_block bb)
>  	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
>  		optimize_range_tests (rhs_code, &ops);
>  
> +	      if (rhs_code == MULT_EXPR)
> +		attempt_builtin_copysign (&ops);
> +
>  	      if (first_pass_instance
>  		  && rhs_code == MULT_EXPR
>  		  && flag_unsafe_math_optimizations)
> 
> 	Marek
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)



More information about the Gcc-patches mailing list