[PATCH] ((A & N) + B) & M optimization (PR tree-optimization/31261, take 2)

Richard Guenther richard.guenther@gmail.com
Thu Sep 30 18:03:00 GMT 2010


On Thu, Sep 30, 2010 at 3:46 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Sep 30, 2010 at 11:21:19AM +0200, Richard Guenther wrote:
>> Hm.  There's still nothing on GIMPLE that performs this then, right?
>
> True, it will hit probably only or mainly during FE folding.
>
>> In fact I have a hard time following the logic of the code as it tries
>> to handle the several cases without comments on what's going on ;)
>>
>> So, can you please add comments of the sort
>>
>>  /* Now we know the expression is of the form (x & b) & c ...  */
>
> Like this?

Yes.

This patch is ok.

Thanks,
Richard.

> 2010-09-30  Jakub Jelinek  <jakub@redhat.com>
>
>        PR tree-optimization/31261
>        * fold-const.c (fold_binary): Optimize ((A & N) + B) & M
>        for constants M and N, M == (1LL << cst) - 1 && (N & M) == M.
>
>        * gcc.dg/tree-ssa/pr31261.c: New test.
>
> --- gcc/fold-const.c.jj 2010-09-16 11:05:27.000000000 +0200
> +++ gcc/fold-const.c    2010-09-30 15:41:43.787396620 +0200
> @@ -11071,6 +11071,133 @@ fold_binary_loc (location_t loc,
>                              fold_convert_loc (loc, type, arg0));
>        }
>
> +      /* For constants M and N, if M == (1LL << cst) - 1 && (N & M) == M,
> +        ((A & N) + B) & M -> (A + B) & M
> +        Similarly if (N & M) == 0,
> +        ((A | N) + B) & M -> (A + B) & M
> +        and for - instead of + (or unary - instead of +)
> +        and/or ^ instead of |.
> +        If B is constant and (B & M) == 0, fold into A & M.  */
> +      if (host_integerp (arg1, 1))
> +       {
> +         unsigned HOST_WIDE_INT cst1 = tree_low_cst (arg1, 1);
> +         if (~cst1 && (cst1 & (cst1 + 1)) == 0
> +             && INTEGRAL_TYPE_P (TREE_TYPE (arg0))
> +             && (TREE_CODE (arg0) == PLUS_EXPR
> +                 || TREE_CODE (arg0) == MINUS_EXPR
> +                 || TREE_CODE (arg0) == NEGATE_EXPR)
> +             && (TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0))
> +                 || TREE_CODE (TREE_TYPE (arg0)) == INTEGER_TYPE))
> +           {
> +             tree pmop[2];
> +             int which = 0;
> +             unsigned HOST_WIDE_INT cst0;
> +
> +             /* Now we know that arg0 is (C + D) or (C - D) or
> +                -C and arg1 (M) is == (1LL << cst) - 1.
> +                Store C into PMOP[0] and D into PMOP[1].  */
> +             pmop[0] = TREE_OPERAND (arg0, 0);
> +             pmop[1] = NULL;
> +             if (TREE_CODE (arg0) != NEGATE_EXPR)
> +               {
> +                 pmop[1] = TREE_OPERAND (arg0, 1);
> +                 which = 1;
> +               }
> +
> +             if (!host_integerp (TYPE_MAX_VALUE (TREE_TYPE (arg0)), 1)
> +                 || (tree_low_cst (TYPE_MAX_VALUE (TREE_TYPE (arg0)), 1)
> +                     & cst1) != cst1)
> +               which = -1;
> +
> +             for (; which >= 0; which--)
> +               switch (TREE_CODE (pmop[which]))
> +                 {
> +                 case BIT_AND_EXPR:
> +                 case BIT_IOR_EXPR:
> +                 case BIT_XOR_EXPR:
> +                   if (TREE_CODE (TREE_OPERAND (pmop[which], 1))
> +                       != INTEGER_CST)
> +                     break;
> +                   /* tree_low_cst not used, because we don't care about
> +                      the upper bits.  */
> +                   cst0 = TREE_INT_CST_LOW (TREE_OPERAND (pmop[which], 1));
> +                   cst0 &= cst1;
> +                   if (TREE_CODE (pmop[which]) == BIT_AND_EXPR)
> +                     {
> +                       if (cst0 != cst1)
> +                         break;
> +                     }
> +                   else if (cst0 != 0)
> +                     break;
> +                   /* If C or D is of the form (A & N) where
> +                      (N & M) == M, or of the form (A | N) or
> +                      (A ^ N) where (N & M) == 0, replace it with A.  */
> +                   pmop[which] = TREE_OPERAND (pmop[which], 0);
> +                   break;
> +                 case INTEGER_CST:
> +                   /* If C or D is a N where (N & M) == 0, it can be
> +                      omitted (assumed 0).  */
> +                   if ((TREE_CODE (arg0) == PLUS_EXPR
> +                        || (TREE_CODE (arg0) == MINUS_EXPR && which == 0))
> +                       && (TREE_INT_CST_LOW (pmop[which]) & cst1) == 0)
> +                     pmop[which] = NULL;
> +                   break;
> +                 default:
> +                   break;
> +                 }
> +
> +             /* Only build anything new if we optimized one or both arguments
> +                above.  */
> +             if (pmop[0] != TREE_OPERAND (arg0, 0)
> +                 || (TREE_CODE (arg0) != NEGATE_EXPR
> +                     && pmop[1] != TREE_OPERAND (arg0, 1)))
> +               {
> +                 tree utype = type;
> +                 if (! TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0)))
> +                   {
> +                     /* Perform the operations in a type that has defined
> +                        overflow behavior.  */
> +                     utype = unsigned_type_for (type);
> +                     if (pmop[0] != NULL)
> +                       pmop[0] = fold_convert_loc (loc, utype, pmop[0]);
> +                     if (pmop[1] != NULL)
> +                       pmop[1] = fold_convert_loc (loc, utype, pmop[1]);
> +                   }
> +
> +                 if (TREE_CODE (arg0) == NEGATE_EXPR)
> +                   tem = fold_build1_loc (loc, NEGATE_EXPR, utype, pmop[0]);
> +                 else if (TREE_CODE (arg0) == PLUS_EXPR)
> +                   {
> +                     if (pmop[0] != NULL && pmop[1] != NULL)
> +                       tem = fold_build2_loc (loc, PLUS_EXPR, utype,
> +                                              pmop[0], pmop[1]);
> +                     else if (pmop[0] != NULL)
> +                       tem = pmop[0];
> +                     else if (pmop[1] != NULL)
> +                       tem = pmop[1];
> +                     else
> +                       return build_int_cst (type, 0);
> +                   }
> +                 else if (pmop[0] == NULL)
> +                   tem = fold_build1_loc (loc, NEGATE_EXPR, utype, pmop[1]);
> +                 else
> +                   tem = fold_build2_loc (loc, MINUS_EXPR, utype,
> +                                          pmop[0], pmop[1]);
> +                 /* TEM is now the new binary +, - or unary - replacement.  */
> +                 if (utype == type)
> +                   return fold_build2_loc (loc, BIT_AND_EXPR, type,
> +                                           tem, arg1);
> +                 else
> +                   {
> +                     tem = fold_build2_loc (loc, BIT_AND_EXPR, utype, tem,
> +                                            fold_convert_loc (loc, utype,
> +                                            arg1));
> +                     return fold_convert_loc (loc, type, tem);
> +                   }
> +               }
> +           }
> +       }
> +
>       t1 = distribute_bit_expr (loc, code, type, arg0, arg1);
>       if (t1 != NULL_TREE)
>        return t1;
> --- gcc/testsuite/gcc.dg/tree-ssa/pr31261.c.jj  2010-09-29 09:48:36.374396699 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr31261.c     2010-09-29 10:12:49.612377621 +0200
> @@ -0,0 +1,40 @@
> +/* PR tree-optimization/31261 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-original" } */
> +
> +unsigned int
> +f1 (unsigned int a)
> +{
> +  return (8 - (a & 7)) & 7;
> +}
> +
> +long int
> +f2 (long int b)
> +{
> +  return (16 + (b & 7)) & 15;
> +}
> +
> +char
> +f3 (char c)
> +{
> +  return -(c & 63) & 31;
> +}
> +
> +int
> +f4 (int d)
> +{
> +  return (12 - (d & 15)) & 7;
> +}
> +
> +int
> +f5 (int e)
> +{
> +  return (12 - (e & 7)) & 15;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "return -a \& 7;" 1 "original" } } */
> +/* { dg-final { scan-tree-dump-times "return b \& 7;" 1 "original" } } */
> +/* { dg-final { scan-tree-dump-times "return \\(char\\) -\\(unsigned char\\) c \& 31;" 1 "original" } } */
> +/* { dg-final { scan-tree-dump-times "return \\(int\\) \\(12 - \\(unsigned int\\) d\\) \& 7;" 1 "original" } } */
> +/* { dg-final { scan-tree-dump-times "return 12 - \\(e \& 7\\) \& 15;" 1 "original" } } */
> +/* { dg-final { cleanup-tree-dump "original" } } */
>
>        Jakub
>



More information about the Gcc-patches mailing list