This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Fix PR54245


On Tue, 14 Aug 2012, William J. Schmidt wrote:

> Currently we can insert an initializer that performs a multiply in too
> small of a type for correctness.  For now, detect the problem and avoid
> the optimization when this would happen.  Eventually I will fix this up
> to cause the multiply to be performed in a sufficiently wide type.
> 
> Bootstrapped and tested on powerpc64-unknown-linux-gnu with no new
> regressions.  Ok for trunk?

Ok.

Thanks,
Richard.

> Thanks,
> Bill
> 
> 
> gcc:
> 
> 2012-08-14  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> 
> 	PR tree-optimization/54245
> 	* gimple-ssa-strength-reduction.c (legal_cast_p_1): New function.
> 	(legal_cast_p): Split out logic to legal_cast_p_1.
> 	(analyze_increments): Avoid introducing multiplies in smaller types.
> 
> 
> gcc/testsuite:
> 
> 2012-08-14  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
> 
> 	PR tree-optimization/54245
> 	* gcc.dg/tree-ssa/pr54245.c: New test.
> 
> 
> Index: gcc/testsuite/gcc.dg/tree-ssa/pr54245.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/pr54245.c	(revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr54245.c	(revision 0)
> @@ -0,0 +1,49 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-slsr-details" } */
> +
> +#include <stdio.h>
> +
> +#define W1  22725
> +#define W2  21407
> +#define W3  19266
> +#define W6  8867
> +
> +void idct_row(short *row, int *dst)
> +{
> +    int a0, a1, b0, b1;
> +
> +    a0 = W1 * row[0];
> +    a1 = a0;
> +
> +    a0 += W2 * row[2];
> +    a1 += W6 * row[2];
> +
> +    b0 = W1 * row[1];
> +    b1 = W3 * row[1];
> +
> +    dst[0] = a0 + b0;
> +    dst[1] = a0 - b0;
> +    dst[2] = a1 + b1;
> +    dst[3] = a1 - b1;
> +}
> +
> +static short block[8] = { 1, 2, 3, 4 };
> +
> +int main(void)
> +{
> +    int out[4];
> +    int i;
> +
> +    idct_row(block, out);
> +
> +    for (i = 0; i < 4; i++)
> +        printf("%d\n", out[i]);
> +
> +    return !(out[2] == 87858 && out[3] == 10794);
> +}
> +
> +/* For now, disable inserting an initializer when the multiplication will
> +   take place in a smaller type than originally.  This test may be deleted
> +   in future when this case is handled more precisely.  */
> +/* { dg-final { scan-tree-dump-times "Inserting initializer" 0 "slsr" } } */
> +/* { dg-final { cleanup-tree-dump "slsr" } } */
> Index: gcc/gimple-ssa-strength-reduction.c
> ===================================================================
> --- gcc/gimple-ssa-strength-reduction.c	(revision 190305)
> +++ gcc/gimple-ssa-strength-reduction.c	(working copy)
> @@ -1089,6 +1089,32 @@ slsr_process_neg (gimple gs, tree rhs1, bool speed
>    add_cand_for_stmt (gs, c);
>  }
>  
> +/* Help function for legal_cast_p, operating on two trees.  Checks
> +   whether it's allowable to cast from RHS to LHS.  See legal_cast_p
> +   for more details.  */
> +
> +static bool
> +legal_cast_p_1 (tree lhs, tree rhs)
> +{
> +  tree lhs_type, rhs_type;
> +  unsigned lhs_size, rhs_size;
> +  bool lhs_wraps, rhs_wraps;
> +
> +  lhs_type = TREE_TYPE (lhs);
> +  rhs_type = TREE_TYPE (rhs);
> +  lhs_size = TYPE_PRECISION (lhs_type);
> +  rhs_size = TYPE_PRECISION (rhs_type);
> +  lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
> +  rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
> +
> +  if (lhs_size < rhs_size
> +      || (rhs_wraps && !lhs_wraps)
> +      || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
> +    return false;
> +
> +  return true;
> +}
> +
>  /* Return TRUE if GS is a statement that defines an SSA name from
>     a conversion and is legal for us to combine with an add and multiply
>     in the candidate table.  For example, suppose we have:
> @@ -1129,28 +1155,11 @@ slsr_process_neg (gimple gs, tree rhs1, bool speed
>  static bool
>  legal_cast_p (gimple gs, tree rhs)
>  {
> -  tree lhs, lhs_type, rhs_type;
> -  unsigned lhs_size, rhs_size;
> -  bool lhs_wraps, rhs_wraps;
> -
>    if (!is_gimple_assign (gs)
>        || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
>      return false;
>  
> -  lhs = gimple_assign_lhs (gs);
> -  lhs_type = TREE_TYPE (lhs);
> -  rhs_type = TREE_TYPE (rhs);
> -  lhs_size = TYPE_PRECISION (lhs_type);
> -  rhs_size = TYPE_PRECISION (rhs_type);
> -  lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
> -  rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
> -
> -  if (lhs_size < rhs_size
> -      || (rhs_wraps && !lhs_wraps)
> -      || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
> -    return false;
> -
> -  return true;
> +  return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
>  }
>  
>  /* Given GS which is a cast to a scalar integer type, determine whether
> @@ -1996,6 +2005,31 @@ analyze_increments (slsr_cand_t first_dep, enum ma
>  		       != POINTER_PLUS_EXPR)))
>  	incr_vec[i].cost = COST_NEUTRAL;
>        
> +      /* FORNOW: If we need to add an initializer, give up if a cast from
> +	 the candidate's type to its stride's type can lose precision.
> +	 This could eventually be handled better by expressly retaining the
> +	 result of a cast to a wider type in the stride.  Example:
> +
> +           short int _1;
> +	   _2 = (int) _1;
> +	   _3 = _2 * 10;
> +	   _4 = x + _3;    ADD: x + (10 * _1) : int
> +	   _5 = _2 * 15;
> +	   _6 = x + _3;    ADD: x + (15 * _1) : int
> +
> +         Right now replacing _6 would cause insertion of an initializer
> +	 of the form "short int T = _1 * 5;" followed by a cast to 
> +	 int, which could overflow incorrectly.  Had we recorded _2 or
> +	 (int)_1 as the stride, this wouldn't happen.  However, doing
> +         this breaks other opportunities, so this will require some
> +	 care.  */
> +      else if (!incr_vec[i].initializer
> +	       && TREE_CODE (first_dep->stride) != INTEGER_CST
> +	       && !legal_cast_p_1 (first_dep->stride,
> +				   gimple_assign_lhs (first_dep->cand_stmt)))
> +
> +	incr_vec[i].cost = COST_INFINITE;
> +
>        /* For any other increment, if this is a multiply candidate, we
>  	 must introduce a temporary T and initialize it with
>  	 T_0 = stride * increment.  When optimizing for speed, walk the
> 
> 
> 

-- 
Richard Guenther <rguenther@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]