This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Fix PR44214


On Thu, 19 Apr 2012, William J. Schmidt wrote:

> This enhances constant folding for division by complex and vector
> constants.  When -freciprocal-math is present, such divisions are
> converted into multiplies by the constant reciprocal.  When an exact
> reciprocal is available, this is done for vector constants when
> optimizing.  I did not implement logic for exact reciprocals of complex
> constants because either (a) the complexity doesn't justify the
> likelihood of occurrence, or (b) I'm lazy.  Your choice. ;)
> 
> Bootstrapped with no new regressions on powerpc64-unknown-linux-gnu.  Ok
> for trunk?

See below ...

> Thanks,
> Bill
> 
> 
> gcc:
> 
> 2012-04-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> 
> 	PR rtl-optimization/44214
> 	* fold-const.c (exact_inverse): New function.
> 	(fold_binary_loc): Fold vector and complex division by constant into
> 	multiply by recripocal with flag_reciprocal_math; fold vector division
> 	by constant into multiply by reciprocal with exact inverse.
> 
> gcc/testsuite:
> 
> 2012-04-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> 
> 	PR rtl-optimization/44214
> 	* gcc.target/powerpc/pr44214-1.c: New test.
> 	* gcc.dg/pr44214-2.c: Likewise.
> 	* gcc.target/powerpc/pr44214-3.c: Likewise.
> 
> 
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c	(revision 186573)
> +++ gcc/fold-const.c	(working copy)
> @@ -9693,6 +9693,48 @@ fold_addr_of_array_ref_difference (location_t loc,
>    return NULL_TREE;
>  }
>  
> +/* If the real or vector real constant CST of type TYPE has an exact
> +   inverse, return it, else return NULL.  */
> +
> +static tree
> +exact_inverse (tree type, tree cst)
> +{
> +  REAL_VALUE_TYPE r;
> +  tree unit_type, *elts;
> +  enum machine_mode mode;
> +  unsigned vec_nelts, i;
> +
> +  switch (TREE_CODE (cst))
> +    {
> +    case REAL_CST:
> +      r = TREE_REAL_CST (cst);
> +
> +      if (exact_real_inverse (TYPE_MODE (type), &r))
> +	return build_real (type, r);
> +
> +      return NULL_TREE;
> +
> +    case VECTOR_CST:
> +      vec_nelts = VECTOR_CST_NELTS (cst);
> +      elts = XALLOCAVEC (tree, vec_nelts);
> +      unit_type = TREE_TYPE (type);
> +      mode = TYPE_MODE (unit_type);
> +
> +      for (i = 0; i < vec_nelts; i++)
> +	{
> +	  r = TREE_REAL_CST (VECTOR_CST_ELT (cst, i));
> +	  if (!exact_real_inverse (mode, &r))
> +	    return NULL_TREE;
> +	  elts[i] = build_real (unit_type, r);
> +	}
> +
> +      return build_vector (type, elts);
> +
> +    default:
> +      return NULL_TREE;
> +    }
> +}
> +
>  /* Fold a binary expression of code CODE and type TYPE with operands
>     OP0 and OP1.  LOC is the location of the resulting expression.
>     Return the folded expression if folding is successful.  Otherwise,
> @@ -11734,23 +11776,25 @@ fold_binary_loc (location_t loc,
>  	 so only do this if -freciprocal-math.  We can actually
>  	 always safely do it if ARG1 is a power of two, but it's hard to
>  	 tell if it is or not in a portable manner.  */
> -      if (TREE_CODE (arg1) == REAL_CST)
> +      if (TREE_CODE (arg1) == REAL_CST
> +	  || (TREE_CODE (arg1) == COMPLEX_CST
> +	      && COMPLEX_FLOAT_TYPE_P (TREE_TYPE (arg1)))
> +	  || (TREE_CODE (arg1) == VECTOR_CST
> +	      && VECTOR_FLOAT_TYPE_P (TREE_TYPE (arg1))))
>  	{
>  	  if (flag_reciprocal_math
> -	      && 0 != (tem = const_binop (code, build_real (type, dconst1),
> +	      && 0 != (tem = fold_binary (code, type, build_one_cst (type),
>  					  arg1)))

Any reason for not using const_binop?

>  	    return fold_build2_loc (loc, MULT_EXPR, type, arg0, tem);
> -	  /* Find the reciprocal if optimizing and the result is exact.  */
> -	  if (optimize)
> +	  /* Find the reciprocal if optimizing and the result is exact.
> +	     TODO: Complex reciprocal not implemented.  */
> +	  if (optimize
> +	      && TREE_CODE (arg1) != COMPLEX_CST)

I know this is all pre-existing, but really the flag_reciprocal_math
case should be under if (optimize), too.  So, can you move this check
to the toplevel covering both cases?

The testcases should apply to generic vectors, too, and should scan
the .original dump (where folding first applied).  So they should
not be target specific (and they should use -freciprocal-math).

Ok with those changes.

Thanks,
Richard.

>  	    {
> -	      REAL_VALUE_TYPE r;
> -	      r = TREE_REAL_CST (arg1);
> -	      if (exact_real_inverse (TYPE_MODE(TREE_TYPE(arg0)), &r))
> -		{
> -		  tem = build_real (type, r);
> -		  return fold_build2_loc (loc, MULT_EXPR, type,
> -				      fold_convert_loc (loc, type, arg0), tem);
> -		}
> +	      tree inverse = exact_inverse (TREE_TYPE (arg0), arg1);
> +
> +	      if (inverse)
> +		return fold_build2_loc (loc, MULT_EXPR, type, arg0, inverse);
>  	    }
>  	}
>        /* Convert A/B/C to A/(B*C).  */
> Index: gcc/testsuite/gcc.target/powerpc/pr44214-3.c
> ===================================================================
> --- gcc/testsuite/gcc.target/powerpc/pr44214-3.c	(revision 0)
> +++ gcc/testsuite/gcc.target/powerpc/pr44214-3.c	(revision 0)
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=power7 -fdump-tree-optimized" } */
> +
> +void do_div (vector double *a, vector double *b)
> +{
> +  *a = *b / (vector double) { 2.0, 2.0 };
> +}
> +
> +/* Since 2.0 has an exact reciprocal, constant folding should multiply *b
> +   by the reciprocals of the vector elements.  As a result there should be
> +   one vector multiply and zero divides in the optimized code.  The string
> +   " * " occurs 3 times: one multiply and two indirect parameters.  */
> +
> +/* { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " / " 0 "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
> Index: gcc/testsuite/gcc.target/powerpc/pr44214-1.c
> ===================================================================
> --- gcc/testsuite/gcc.target/powerpc/pr44214-1.c	(revision 0)
> +++ gcc/testsuite/gcc.target/powerpc/pr44214-1.c	(revision 0)
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ffast-math -mcpu=power7 -fdump-tree-optimized" } */
> +
> +void do_div (vector double *a, vector double *b)
> +{
> +  *a = *b / (vector double) { 2.0, 3.0 };
> +}
> +
> +/* Constant folding should multiply *b by the reciprocals of the
> +   vector elements.  As a result there should be one vector multiply
> +   and zero divides in the optimized code.  The string " * " occurs
> +   3 times: one multiply and two indirect parameters.  */
> +
> +/* { dg-final { scan-tree-dump-times " \\\* " 3 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " / " 0 "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
> Index: gcc/testsuite/gcc.dg/pr44214-2.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/pr44214-2.c	(revision 0)
> +++ gcc/testsuite/gcc.dg/pr44214-2.c	(revision 0)
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
> +
> +void do_div (_Complex double *a, _Complex double *b)
> +{
> +  *a = *b / (4.0 - 5.0fi);
> +}
> +
> +/* Constant folding should multiply *b by the reciprocal of 4-5i
> +   = 4/41 - (5/41)i.  As a result there should be 4 multiplies and
> +   zero divides in the optimized code.  The string " * " occurs 6
> +   times: 4 multiplies and 2 indirect parameters.  */
> +
> +/* { dg-final { scan-tree-dump-times " \\\* " 6 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times " / " 0 "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]