This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][1/n] Remove GENERIC stmt combining from SCCVN


On Thu, 25 Jun 2015, Richard Biener wrote:

> 
> A first pattern moved from fold-const.c - albeit a very twisted one.
> It triggers in gcc.dg/tree-ssa/pr52631.c.
> 
> I've tried a nearly 1:1 translation (cut&paste and convert the
> code structure to withs/ifs and then replace tree ops accordingly).
> 
> One might question if the narrowing case should better be split
> out generically, that is convert X & CST to narrowed X if CST
> masks out upper bits (independent on the operation generating X).
> 
> I'll leave any such improvements for followups.
> 
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Applied with additional

        * gcc.dg/tree-ssa/pr52631.c: Disable forwprop.

Index: gcc/testsuite/gcc.dg/tree-ssa/pr52631.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr52631.c     (revision 224893)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr52631.c     (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-fre1-details" } */
+/* { dg-options "-O2 -fno-tree-forwprop -fdump-tree-fre1-details" } */
 
 unsigned f(unsigned a)
 {


> Richard.
> 
> 2015-06-25  Richard Biener  <rguenther@suse.de>
> 
> 	* fold-const.c (fold_binary_loc): Move simplification of
> 	(X <<>> C1) & C2 ...
> 	* match.pd: ... here.
> 
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c	(revision 224893)
> +++ gcc/fold-const.c	(working copy)
> @@ -11516,106 +11516,6 @@ fold_binary_loc (location_t loc,
>  	    return build_int_cst (type, residue & low);
>  	}
>  
> -      /* Fold (X << C1) & C2 into (X << C1) & (C2 | ((1 << C1) - 1))
> -	      (X >> C1) & C2 into (X >> C1) & (C2 | ~((type) -1 >> C1))
> -	 if the new mask might be further optimized.  */
> -      if ((TREE_CODE (arg0) == LSHIFT_EXPR
> -	   || TREE_CODE (arg0) == RSHIFT_EXPR)
> -	  && TYPE_PRECISION (TREE_TYPE (arg0)) <= HOST_BITS_PER_WIDE_INT
> -	  && TREE_CODE (arg1) == INTEGER_CST
> -	  && tree_fits_uhwi_p (TREE_OPERAND (arg0, 1))
> -	  && tree_to_uhwi (TREE_OPERAND (arg0, 1)) > 0
> -	  && (tree_to_uhwi (TREE_OPERAND (arg0, 1))
> -	      < TYPE_PRECISION (TREE_TYPE (arg0))))
> -	{
> -	  unsigned int shiftc = tree_to_uhwi (TREE_OPERAND (arg0, 1));
> -	  unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg1);
> -	  unsigned HOST_WIDE_INT newmask, zerobits = 0;
> -	  tree shift_type = TREE_TYPE (arg0);
> -
> -	  if (TREE_CODE (arg0) == LSHIFT_EXPR)
> -	    zerobits = ((((unsigned HOST_WIDE_INT) 1) << shiftc) - 1);
> -	  else if (TREE_CODE (arg0) == RSHIFT_EXPR
> -		   && TYPE_PRECISION (TREE_TYPE (arg0))
> -		      == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (arg0))))
> -	    {
> -	      prec = TYPE_PRECISION (TREE_TYPE (arg0));
> -	      tree arg00 = TREE_OPERAND (arg0, 0);
> -	      /* See if more bits can be proven as zero because of
> -		 zero extension.  */
> -	      if (TREE_CODE (arg00) == NOP_EXPR
> -		  && TYPE_UNSIGNED (TREE_TYPE (TREE_OPERAND (arg00, 0))))
> -		{
> -		  tree inner_type = TREE_TYPE (TREE_OPERAND (arg00, 0));
> -		  if (TYPE_PRECISION (inner_type)
> -		      == GET_MODE_PRECISION (TYPE_MODE (inner_type))
> -		      && TYPE_PRECISION (inner_type) < prec)
> -		    {
> -		      prec = TYPE_PRECISION (inner_type);
> -		      /* See if we can shorten the right shift.  */
> -		      if (shiftc < prec)
> -			shift_type = inner_type;
> -		      /* Otherwise X >> C1 is all zeros, so we'll optimize
> -			 it into (X, 0) later on by making sure zerobits
> -			 is all ones.  */
> -		    }
> -		}
> -	      zerobits = ~(unsigned HOST_WIDE_INT) 0;
> -	      if (shiftc < prec)
> -		{
> -		  zerobits >>= HOST_BITS_PER_WIDE_INT - shiftc;
> -		  zerobits <<= prec - shiftc;
> -		}
> -	      /* For arithmetic shift if sign bit could be set, zerobits
> -		 can contain actually sign bits, so no transformation is
> -		 possible, unless MASK masks them all away.  In that
> -		 case the shift needs to be converted into logical shift.  */
> -	      if (!TYPE_UNSIGNED (TREE_TYPE (arg0))
> -		  && prec == TYPE_PRECISION (TREE_TYPE (arg0)))
> -		{
> -		  if ((mask & zerobits) == 0)
> -		    shift_type = unsigned_type_for (TREE_TYPE (arg0));
> -		  else
> -		    zerobits = 0;
> -		}
> -	    }
> -
> -	  /* ((X << 16) & 0xff00) is (X, 0).  */
> -	  if ((mask & zerobits) == mask)
> -	    return omit_one_operand_loc (loc, type,
> -					 build_int_cst (type, 0), arg0);
> -
> -	  newmask = mask | zerobits;
> -	  if (newmask != mask && (newmask & (newmask + 1)) == 0)
> -	    {
> -	      /* Only do the transformation if NEWMASK is some integer
> -		 mode's mask.  */
> -	      for (prec = BITS_PER_UNIT;
> -		   prec < HOST_BITS_PER_WIDE_INT; prec <<= 1)
> -		if (newmask == (((unsigned HOST_WIDE_INT) 1) << prec) - 1)
> -		  break;
> -	      if (prec < HOST_BITS_PER_WIDE_INT
> -		  || newmask == ~(unsigned HOST_WIDE_INT) 0)
> -		{
> -		  tree newmaskt;
> -
> -		  if (shift_type != TREE_TYPE (arg0))
> -		    {
> -		      tem = fold_build2_loc (loc, TREE_CODE (arg0), shift_type,
> -					 fold_convert_loc (loc, shift_type,
> -							   TREE_OPERAND (arg0, 0)),
> -					 TREE_OPERAND (arg0, 1));
> -		      tem = fold_convert_loc (loc, type, tem);
> -		    }
> -		  else
> -		    tem = op0;
> -		  newmaskt = build_int_cst_type (TREE_TYPE (op1), newmask);
> -		  if (!tree_int_cst_equal (newmaskt, arg1))
> -		    return fold_build2_loc (loc, BIT_AND_EXPR, type, tem, newmaskt);
> -		}
> -	    }
> -	}
> -
>        goto associate;
>  
>      case RDIV_EXPR:
> Index: gcc/match.pd
> ===================================================================
> --- gcc/match.pd	(revision 224893)
> +++ gcc/match.pd	(working copy)
> @@ -738,6 +755,97 @@ (define_operator_list swapped_tcc_compar
>  	&& wi::eq_p (wi::lshift (@0, cand), @2))
>      (cmp @1 { build_int_cst (TREE_TYPE (@1), cand); })))))
>  
> +/* Fold (X << C1) & C2 into (X << C1) & (C2 | ((1 << C1) - 1))
> +        (X >> C1) & C2 into (X >> C1) & (C2 | ~((type) -1 >> C1))
> +   if the new mask might be further optimized.  */
> +(for shift (lshift rshift)
> + (simplify
> +  (bit_and (convert?@4 (shift@5 (convert1?@3 @0) INTEGER_CST@1)) INTEGER_CST@2)
> +   (if (tree_nop_conversion_p (TREE_TYPE (@4), TREE_TYPE (@5))
> +	&& TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
> +	&& tree_fits_uhwi_p (@1)
> +	&& tree_to_uhwi (@1) > 0
> +	&& tree_to_uhwi (@1) < TYPE_PRECISION (type))
> +    (with
> +     {
> +       unsigned int shiftc = tree_to_uhwi (@1);
> +       unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (@2);
> +       unsigned HOST_WIDE_INT newmask, zerobits = 0;
> +       tree shift_type = TREE_TYPE (@3);
> +       unsigned int prec;
> +
> +       if (shift == LSHIFT_EXPR)
> +	 zerobits = ((((unsigned HOST_WIDE_INT) 1) << shiftc) - 1);
> +       else if (shift == RSHIFT_EXPR
> +		&& (TYPE_PRECISION (shift_type)
> +		    == GET_MODE_PRECISION (TYPE_MODE (shift_type))))
> +	 {
> +	   prec = TYPE_PRECISION (TREE_TYPE (@3));
> +	   tree arg00 = @0;
> +	   /* See if more bits can be proven as zero because of
> +	      zero extension.  */
> +	   if (@3 != @0
> +	       && TYPE_UNSIGNED (TREE_TYPE (@0)))
> +	     {
> +	       tree inner_type = TREE_TYPE (@0);
> +	       if ((TYPE_PRECISION (inner_type)
> +		    == GET_MODE_PRECISION (TYPE_MODE (inner_type)))
> +		   && TYPE_PRECISION (inner_type) < prec)
> +		 {
> +		   prec = TYPE_PRECISION (inner_type);
> +		   /* See if we can shorten the right shift.  */
> +		   if (shiftc < prec)
> +		     shift_type = inner_type;
> +		   /* Otherwise X >> C1 is all zeros, so we'll optimize
> +		      it into (X, 0) later on by making sure zerobits
> +		      is all ones.  */
> +		 }
> +	     }
> +	   zerobits = ~(unsigned HOST_WIDE_INT) 0;
> +	   if (shiftc < prec)
> +	     {
> +	       zerobits >>= HOST_BITS_PER_WIDE_INT - shiftc;
> +	       zerobits <<= prec - shiftc;
> +	     }
> +	   /* For arithmetic shift if sign bit could be set, zerobits
> +	      can contain actually sign bits, so no transformation is
> +	      possible, unless MASK masks them all away.  In that
> +	      case the shift needs to be converted into logical shift.  */
> +	   if (!TYPE_UNSIGNED (TREE_TYPE (@3))
> +	       && prec == TYPE_PRECISION (TREE_TYPE (@3)))
> +	     {
> +	       if ((mask & zerobits) == 0)
> +		 shift_type = unsigned_type_for (TREE_TYPE (@3));
> +	       else
> +		 zerobits = 0;
> +	     }
> +	 }
> +     }
> +     /* ((X << 16) & 0xff00) is (X, 0).  */
> +     (if ((mask & zerobits) == mask)
> +      { build_int_cst (type, 0); })
> +     (with { newmask = mask | zerobits; }
> +      (if (newmask != mask && (newmask & (newmask + 1)) == 0)
> +       (with
> +	{
> +	  /* Only do the transformation if NEWMASK is some integer
> +	     mode's mask.  */
> +	  for (prec = BITS_PER_UNIT;
> +	       prec < HOST_BITS_PER_WIDE_INT; prec <<= 1)
> +	    if (newmask == (((unsigned HOST_WIDE_INT) 1) << prec) - 1)
> +	      break;
> +	}
> +	(if (prec < HOST_BITS_PER_WIDE_INT
> +	     || newmask == ~(unsigned HOST_WIDE_INT) 0)
> +	 (with
> +	  { tree newmaskt = build_int_cst_type (TREE_TYPE (@2), newmask); }
> +	  (if (!tree_int_cst_equal (newmaskt, @2))
> +	   (if (shift_type != TREE_TYPE (@3)
> +		&& single_use (@4) && single_use (@5))
> +	    (bit_and (convert (shift:shift_type (convert @3) @1)) { newmaskt; }))
> +	   (if (shift_type == TREE_TYPE (@3))
> +	    (bit_and @4 { newmaskt; }))))))))))))
> +
>  /* Simplifications of conversions.  */
>  
>  /* Basic strip-useless-type-conversions / strip_nops.  */
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Dilip Upmanyu, Graham Norton, HRB 21284 (AG Nuernberg)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]